+ fixed minor inconsistencies in the code
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.DatasetUtils import generatePredictionDataset,generateTrainingDataset
25
26 from qpalma.SettingsParser import parseSettings
27
28 from qpalma.utils import logwrite
29
30
31 Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
32
33
34 class System:
35 """
36 This class wraps the outer loop of the qpalma project
37
38 It is responsible for:
39
40 - loading and checking the config file(s)
41 - setting up the different pipeline modules
42 - run the experiment and report the results
43
44 """
45
46 def __init__(self,filename):
47 """
48 Inititalize the system by loading and parsing the settings file to obtain
49 all parameters.
50 """
51
52 self.settings = parseSettings(filename)
53 logwrite('Parsed settings system set up.',self.settings)
54
55
56 def training(self):
57 """
58 This function is responsible for the whole training process. It first
59 converts the data to the right format needed by QPalma for the training
60 algorithm.
61 """
62 logwrite('Begin of training.\n',self.settings)
63
64 print '#'*80
65 print '\t\t\tStarting approximation...\n'
66 print '#'*80
67
68 #
69 pre_task = TrainingPreprocessingTask(self.settings)
70 pre_task.createJobs()
71 pre_task.submit()
72 pre_task.checkIfTaskFinished()
73
74 # Collect the data and create a pickled training set
75 generateTrainingDataset(self.settings)
76
77 # Now that we have a dataset we can perform training
78 train_task = TrainingTask(self.settings)
79 train_task.CreateJobs()
80 train_task.Submit()
81 train_task.CheckIfTaskFinished()
82
83 logwrite('End of training.\n',self.settings)
84
85
86 def prediction(self):
87 """
88 This function encapsulates all steps needed to perform a prediction. Given
89 the parameter of the training and paths to a prediction set it will
90 generate several output files containing the spliced alignments
91 """
92
93 logwrite('Begin of prediction.\n',self.settings)
94
95 print '#'*80
96 print '\t\t\tStarting approximation...\n'
97 print '#'*80
98
99 # Before creating a candidate spliced read dataset we have to first filter
100 # the matches from the first seed finding run.
101
102 #approx_task = ApproximationTask(self.settings)
103 #approx_task.CreateJobs()
104 #approx_task.Submit()
105 #approx_task.CheckIfTaskFinished()
106
107 # After filtering combine the filtered matches from the first run and the
108 # found matches from the second run to a full dataset
109
110 print '#'*80
111 print '\t\t\tStarting dataset generation...\n'
112 print '#'*80
113
114 #generatePredictionDataset(self.settings)
115
116 print '#'*80
117 print '\t\t\tStarting alignments...\n'
118 print '#'*80
119
120 # Now that we have a dataset we can perform accurate alignments
121 #align_task = AlignmentTask(self.settings)
122 #align_task.CreateJobs()
123 #align_task.Submit()
124 #align_task.CheckIfTaskFinished()
125
126 print '#'*80
127 print '\t\t\tPostprocessing...\n'
128 print '#'*80
129
130 # The results of the above alignment step can be converted to a data format
131 # needed for further postprocessing.
132 post_task = PostprocessingTask(self.settings)
133 post_task.CreateJobs()
134 post_task.Submit()
135 post_task.CheckIfTaskFinished()
136
137 logwrite('End of prediction.\n',self.settings)
138
139
140 if __name__ == '__main__':
141 mode = sys.argv[1]
142 assert mode in ['predict','train'], Errormsg
143 filename = sys.argv[2]
144 assert os.path.exists(filename), Errormsg
145
146 # creating system object
147 system_obj = System(filename)
148
149 if mode == 'predict':
150 system_obj.prediction()
151 elif mode == 'train':
152 system_obj.training()
153 else:
154 assert False