fixed minor inconsistencies in the code (naming of new variables etc.)
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.DatasetUtils import generatePredictionDataset,generateTrainingDataset
25
26 from qpalma.SettingsParser import parseSettings
27
28
29 Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
30
31
32 class System:
33 """
34 This class wraps the outer loop of the qpalma project
35
36 It is responsible for:
37
38 - loading and checking the config file(s)
39 - setting up the different pipeline modules
40 - run the experiment and report the results
41
42 """
43
44 def __init__(self,filename):
45 """
46 Inititalize the system by loading and parsing the settings file to obtain
47 all parameters.
48 """
49
50 self.settings = parseSettings(filename)
51
52
53 def training(self):
54 """
55 This function is responsible for the whole training process. It first
56 converts the data to the right format needed by QPalma for the training
57 algorithm.
58 """
59
60 #
61 pre_task = TrainingPreprocessingTask(self.settings)
62 pre_task.createJobs()
63 pre_task.submit()
64 pre_task.checkIfTaskFinished()
65
66 #
67 generateTrainingDataset(self.settings)
68
69 # Now that we have a dataset we can perform accurate trainments
70 train_task = TrainingTask(self.settings)
71 train_task.CreateJobs()
72 train_task.Submit()
73 train_task.CheckIfTaskFinished()
74
75
76 def prediction(self):
77 """
78 This function encapsulates all steps needed to perform a prediction. Given
79 the parameter of the training and paths to a prediction set it will
80 generate several output files containing the spliced alignments
81 """
82
83 # Before creating a candidate spliced read dataset we have to first filter
84 # the matches from the first seed finding run.
85
86 #approx_task = ApproximationTask(self.settings)
87 #approx_task.CreateJobs()
88 #approx_task.Submit()
89 #approx_task.CheckIfTaskFinished()
90
91 # After filtering combine the filtered matches from the first run and the
92 # found matches from the second run to a full dataset
93
94 generatePredictionDataset(self.settings)
95 #pre_task = PreprocessingTask(self.settings)
96 #pre_task.CreateJobs()
97 #pre_task.Submit()
98 #pre_task.CheckIfTaskFinished()
99
100 # Now that we have a dataset we can perform accurate alignments
101 align_task = AlignmentTask(self.settings)
102 align_task.CreateJobs()
103 align_task.Submit()
104 align_task.CheckIfTaskFinished()
105
106 # The results of the above alignment step can be converted to a data format
107 # needed for further postprocessing.
108 post_task = PostprocessingTask(self.settings)
109 post_task.CreateJobs()
110 post_task.Submit()
111 post_task.CheckIfTaskFinished()
112
113 print "Success!"
114
115
116 if __name__ == '__main__':
117 filename = sys.argv[1]
118 assert os.path.exists(filename), Errormsg
119 system_obj = System(filename)
120 system_obj.prediction()
121
122 #system_obj.training()