7b4c7a661c71a36ab11a0b08903afbc3080b2b9a
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.DatasetUtils import generatePredictionDataset,generateTrainingDataset
25
26 from SettingsParser import parseSettings
27
28
29 Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
30
31
32 class System:
33 """
34 This class wraps the outer loop of the qpalma project
35
36 It is responsible for:
37
38 - loading and checking the config file(s)
39 - setting up the different pipeline modules
40 - run the experiment and report the results
41
42 """
43
44 def __init__(self,filename):
45 """
46 Inititalize the system by loading and parsing the settings file to obtain
47 all parameters.
48 """
49
50 self.settings = parseSettings(filename)
51
52
53 def training(self):
54 """
55 This function is responsible for the whole training process. It first
56 converts the data to the right format needed by QPalma for the training
57 algorithm.
58 """
59
60 pre_task = TrainingPreprocessingTask(self.settings)
61 pre_task.createJobs()
62 pre_task.submit()
63 pre_task.checkIfTaskFinished()
64
65
66 def prediction(self):
67 """
68 This function encapsulates all steps needed to perform a prediction. Given
69 the parameter of the training and paths to a prediction set it will
70 generate several output files containing the spliced alignments
71 """
72
73 # Before creating a candidate spliced read dataset we have to first filter
74 # the matches from the first seed finding run.
75
76 #approx_task = ApproximationTask(self.settings)
77 #approx_task.CreateJobs()
78 #approx_task.Submit()
79 #approx_task.CheckIfTaskFinished()
80
81 # After filtering combine the filtered matches from the first run and the
82 # found matches from the second run to a full dataset
83
84 generatePredictionDataset(self.settings)
85 #pre_task = PreprocessingTask(self.settings)
86 #pre_task.CreateJobs()
87 #pre_task.Submit()
88 #pre_task.CheckIfTaskFinished()
89
90 # Now that we have a dataset we can perform accurate alignments
91 align_task = AlignmentTask(self.settings)
92 align_task.CreateJobs()
93 align_task.Submit()
94 align_task.CheckIfTaskFinished()
95
96 # The results of the above alignment step can be converted to a data format
97 # needed for further postprocessing.
98 post_task = PostprocessingTask(self.settings)
99 post_task.CreateJobs()
100 post_task.Submit()
101 post_task.CheckIfTaskFinished()
102
103 print "Success!"
104
105
106 if __name__ == '__main__':
107 filename = sys.argv[1]
108 assert os.path.exists(filename), Errormsg
109 system_obj = System(filename)
110 system_obj.prediction()
111
112 #system_obj.training()