+ added settings parser module
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.settingsParser import parseSettings
25
26
27 Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
28
29
30 class System:
31 """
32 This class wraps the outer loop of the qpalma project
33
34 It is responsible for:
35
36 - loading and checking the config file(s)
37 - setting up the different pipeline modules
38 - run the experiment and report the results
39
40 """
41
42 def __init__(self,filename):
43 """
44 Inititalize the system by loading and parsing the settings file to obtain
45 all parameters.
46 """
47
48 self.global_settings = parseSettings(filename)
49 pdb.set_trace()
50
51
52 def training(self):
53 """
54 This function is responsible for the whole training process. It first
55 converts the data to the right format needed by QPalma for the training
56 algorithm.
57 """
58
59 pre_task = TrainingPreprocessingTask(self.global_settings)
60 pre_task.createJobs()
61 pre_task.submit()
62 while pre_task.checkIfTaskFinished() == False:
63 sleep(20)
64
65
66 def prediction(self):
67 """
68 This function encapsulates all steps needed to perform a prediction. Given
69 the parameter of the training and paths to a prediction set it will
70 generate several output files containing the spliced alignments
71 """
72
73 # Before creating a candidate spliced read dataset we have to first filter
74 # the matches from the first seed finding run.
75
76 approx_task = ApproximationTask(self.global_settings)
77 approx_task.createJobs()
78 approx_task.submit()
79 approx_task.checkIfTaskFinished()
80
81 # After filtering combine the filtered matches from the first run and the
82 # found matches from the second run to a full dataset
83
84 sys.exit(0)
85
86 pre_task = PreprocessingTask(self.global_settings)
87 pre_task.createJobs()
88 pre_task.submit()
89 pre_task.checkIfTaskFinished()
90
91 # Now that we have a dataset we can perform the accurate alignments for this
92 # data
93
94 align_task = AlignmentTask(self.global_settings)
95 align_task.createJobs()
96 align_task.submit()
97 align_task.checkIfTaskFinished()
98
99 # The results of the above alignment step can be converted to a data format
100 # needed for further postprocessing.
101
102 post_task = PostprocessingTask(self.global_settings)
103 post_task.createJobs()
104 post_task.submit()
105 post_task.checkIfTaskFinished()
106
107 print "Success!"
108
109
110 if __name__ == '__main__':
111 filename = sys.argv[1]
112 assert os.path.exists(filename), Errormsg
113 system_obj = System(filename)
114 system_obj.prediction()
115
116 #system_obj.training()