+ added some documentary text
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.DatasetUtils import generatePredictionDataset,generateTrainingDataset
25
26 from qpalma.SettingsParser import parseSettings
27
28 from qpalma.utils import logwrite
29
30
31 Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
32
33
34 class System:
35 """
36 This class wraps the outer loop of the qpalma project
37
38 It is responsible for:
39
40 - loading and checking the config file(s)
41 - setting up the different pipeline modules
42 - run the experiment and report the results
43
44 """
45
46 def __init__(self,filename):
47 """
48 Inititalize the system by loading and parsing the settings file to obtain
49 all parameters.
50 """
51
52 self.settings = parseSettings(filename)
53 logwrite('Parsed settings system set up.',self.settings)
54
55
56 def training(self):
57 """
58 This function is responsible for the whole training process. It first
59 converts the data to the right format needed by QPalma for the training
60 algorithm.
61 """
62 logwrite('Begin of training.\n',self.settings)
63
64 #
65 pre_task = TrainingPreprocessingTask(self.settings)
66 pre_task.createJobs()
67 pre_task.submit()
68 pre_task.checkIfTaskFinished()
69
70 #
71 generateTrainingDataset(self.settings)
72
73 # Now that we have a dataset we can perform accurate trainments
74 train_task = TrainingTask(self.settings)
75 train_task.CreateJobs()
76 train_task.Submit()
77 train_task.CheckIfTaskFinished()
78
79 logwrite('End of training.\n',self.settings)
80
81 def prediction(self):
82 """
83 This function encapsulates all steps needed to perform a prediction. Given
84 the parameter of the training and paths to a prediction set it will
85 generate several output files containing the spliced alignments
86 """
87
88 logwrite('Begin of prediction.\n',self.settings)
89
90 # Before creating a candidate spliced read dataset we have to first filter
91 # the matches from the first seed finding run.
92
93 #approx_task = ApproximationTask(self.settings)
94 #approx_task.CreateJobs()
95 #approx_task.Submit()
96 #approx_task.CheckIfTaskFinished()
97
98 # After filtering combine the filtered matches from the first run and the
99 # found matches from the second run to a full dataset
100
101 generatePredictionDataset(self.settings)
102 #pre_task = PreprocessingTask(self.settings)
103 #pre_task.CreateJobs()
104 #pre_task.Submit()
105 #pre_task.CheckIfTaskFinished()
106
107 # Now that we have a dataset we can perform accurate alignments
108 align_task = AlignmentTask(self.settings)
109 align_task.CreateJobs()
110 align_task.Submit()
111 align_task.CheckIfTaskFinished()
112
113 # The results of the above alignment step can be converted to a data format
114 # needed for further postprocessing.
115 post_task = PostprocessingTask(self.settings)
116 post_task.CreateJobs()
117 post_task.Submit()
118 post_task.CheckIfTaskFinished()
119
120 logwrite('End of prediction.\n',self.settings)
121
122
123 if __name__ == '__main__':
124 mode = sys.argv[1]
125 assert mode in ['predict','train'], Errormsg
126 filename = sys.argv[2]
127 assert os.path.exists(filename), Errormsg
128
129 # creating system object
130 system_obj = System(filename)
131
132 if mode == 'predict':
133 system_obj.prediction()
134 elif mode == 'train':
135 system_obj.training()
136 else:
137 assert False