ae24e4c13cf6df1e7c36bca02e0faa6e27cbbfa3
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from SettingsParser import parseSettings
25
26
27 Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
28
29
30 class System:
31 """
32 This class wraps the outer loop of the qpalma project
33
34 It is responsible for:
35
36 - loading and checking the config file(s)
37 - setting up the different pipeline modules
38 - run the experiment and report the results
39
40 """
41
42 def __init__(self,filename):
43 """
44 Inititalize the system by loading and parsing the settings file to obtain
45 all parameters.
46 """
47
48 self.global_settings = parseSettings(filename)
49
50
51 def training(self):
52 """
53 This function is responsible for the whole training process. It first
54 converts the data to the right format needed by QPalma for the training
55 algorithm.
56 """
57
58 pre_task = TrainingPreprocessingTask(self.global_settings)
59 pre_task.createJobs()
60 pre_task.submit()
61 while pre_task.checkIfTaskFinished() == False:
62 sleep(20)
63
64
65 def prediction(self):
66 """
67 This function encapsulates all steps needed to perform a prediction. Given
68 the parameter of the training and paths to a prediction set it will
69 generate several output files containing the spliced alignments
70 """
71
72 # Before creating a candidate spliced read dataset we have to first filter
73 # the matches from the first seed finding run.
74
75 approx_task = ApproximationTask(self.global_settings)
76 approx_task.CreateJobs()
77 approx_task.Submit()
78 approx_task.CheckIfTaskFinished()
79
80 # After filtering combine the filtered matches from the first run and the
81 # found matches from the second run to a full dataset
82
83 sys.exit(0)
84
85 pre_task = PreprocessingTask(self.global_settings)
86 pre_task.createJobs()
87 pre_task.submit()
88 pre_task.checkIfTaskFinished()
89
90 # Now that we have a dataset we can perform the accurate alignments for this
91 # data
92
93 align_task = AlignmentTask(self.global_settings)
94 align_task.createJobs()
95 align_task.submit()
96 align_task.checkIfTaskFinished()
97
98 # The results of the above alignment step can be converted to a data format
99 # needed for further postprocessing.
100
101 post_task = PostprocessingTask(self.global_settings)
102 post_task.createJobs()
103 post_task.submit()
104 post_task.checkIfTaskFinished()
105
106 print "Success!"
107
108
109 if __name__ == '__main__':
110 filename = sys.argv[1]
111 assert os.path.exists(filename), Errormsg
112 system_obj = System(filename)
113 system_obj.prediction()
114
115 #system_obj.training()