+ extended pipeline code
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.DatasetUtils import generatePredictionDataset,generateTrainingDataset
25
26 from qpalma.SettingsParser import parseSettings
27
28 from qpalma.utils import logwrite
29
30
31 Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
32
33
34 class System:
35 """
36 This class wraps the outer loop of the qpalma project
37
38 It is responsible for:
39
40 - loading and checking the config file(s)
41 - setting up the different pipeline modules
42 - run the experiment and report the results
43
44 """
45
46 def __init__(self,filename):
47 """
48 Inititalize the system by loading and parsing the settings file to obtain
49 all parameters.
50 """
51
52 self.settings = parseSettings(filename)
53 logwrite('Parsed settings system set up.',self.settings)
54
55
56 def training(self):
57 """
58 This function is responsible for the whole training process. It first
59 converts the data to the right format needed by QPalma for the training
60 algorithm.
61 """
62 logwrite('Begin of training.\n',self.settings)
63
64 print '#'*80
65 print '\t\t\tStarting approximation...\n'
66 print '#'*80
67
68 #
69 pre_task = TrainingPreprocessingTask(self.settings)
70 pre_task.createJobs()
71 pre_task.submit()
72 pre_task.checkIfTaskFinished()
73
74 # Collect the data and create a pickled training set
75 generateTrainingDataset(self.settings)
76
77 # Now that we have a dataset we can perform training
78 train_task = TrainingTask(self.settings)
79 train_task.CreateJobs()
80 train_task.Submit()
81 train_task.CheckIfTaskFinished()
82
83 logwrite('End of training.\n',self.settings)
84
85
86 def prediction(self):
87 """
88 This function encapsulates all steps needed to perform a prediction. Given
89 the parameter of the training and paths to a prediction set it will
90 generate several output files containing the spliced alignments
91 """
92
93 logwrite('Begin of prediction.\n',self.settings)
94
95 print '#'*80
96 print '\t\t\tStarting approximation...\n'
97 print '#'*80
98
99 # Before creating a candidate spliced read dataset we have to first filter
100 # the matches from the first seed finding run.
101
102 approx_task = ApproximationTask(self.settings)
103 approx_task.CreateJobs()
104 approx_task.Submit()
105 approx_task.CheckIfTaskFinished()
106
107 sys.exit(0)
108
109 # After filtering combine the filtered matches from the first run and the
110 # found matches from the second run to a full dataset
111
112 print '#'*80
113 print '\t\t\tStarting dataset generation...\n'
114 print '#'*80
115
116 generatePredictionDataset(self.settings)
117 #pre_task = PreprocessingTask(self.settings)
118 #pre_task.CreateJobs()
119 #pre_task.Submit()
120 #pre_task.CheckIfTaskFinished()
121
122 print '#'*80
123 print '\t\t\tStarting alignments...\n'
124 print '#'*80
125
126 # Now that we have a dataset we can perform accurate alignments
127 align_task = AlignmentTask(self.settings)
128 align_task.CreateJobs()
129 align_task.Submit()
130 align_task.CheckIfTaskFinished()
131
132 print '#'*80
133 print '\t\t\tPostprocessing...\n'
134 print '#'*80
135
136 # The results of the above alignment step can be converted to a data format
137 # needed for further postprocessing.
138 post_task = PostprocessingTask(self.settings)
139 post_task.CreateJobs()
140 post_task.Submit()
141 post_task.CheckIfTaskFinished()
142
143 logwrite('End of prediction.\n',self.settings)
144
145
146 if __name__ == '__main__':
147 mode = sys.argv[1]
148 assert mode in ['predict','train'], Errormsg
149 filename = sys.argv[2]
150 assert os.path.exists(filename), Errormsg
151
152 # creating system object
153 system_obj = System(filename)
154
155 if mode == 'predict':
156 system_obj.prediction()
157 elif mode == 'train':
158 system_obj.training()
159 else:
160 assert False