+ added settings in the form of a global and a run specific part
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 from optparse import OptionParser
17
18 from qpalma.gridtools import ApproximationTask,PreprocessingTask
19 from qpalma.gridtools import AlignmentTask,PostprocessingTask
20
21 def create_option_parser():
22 parser = OptionParser()
23
24 #
25 parser.add_option("-ci", "--check_and_init", help="check configuration and initialize directories")
26
27 #
28 parser.add_option("-r", "--run", help="write report to FILE", metavar="FILE")
29
30 #
31 parser.add_option("-xx", "--clear", action="store_false", dest="verbose", help="cleanup directories delete all created data")
32
33 return parser
34
35 global_settings = {\
36 'experiment_dir':'/fml/ag-raetsch/...',\
37 'read_ascii_data_fn':'/fml/ag-raetsch/...',\
38 'num_splits':50
39 'global_log_fn':'~/qpalma.log'
40 }
41
42
43
44 class System:
45 """
46 This class wraps the outer loop of the qpalma project
47
48 It is responsible for:
49
50 - loading and checking the config file(s)
51 - setting up the different pipeline modules
52 - run the experiment and report the results
53
54 """
55
56 def __init__(self):
57 """
58 """
59 parser = create_option_parser()
60 (options, args) = parser.parse_args()
61
62 def training(self):
63 """
64 This function is responsible for the whole training process. It first
65 converts the data to the right format needed by QPalma for the training
66 algorithm.
67 """
68
69 pre_task = TrainingPreprocessingTask(global_settings,run_specific_settings)
70 pre_task.createJobs()
71 pre_task.submit()
72 while pre_task.checkIfTaskFinished() == False:
73 sleep(20)
74
75
76 def prediction(self):
77 """
78 This function encapsulates all steps needed to perform a prediction. Given
79 the parameter of the training and paths to a prediction set it will
80 generate several output files containing the spliced alignments
81 """
82
83 # Before creating a candidate spliced read dataset we have to first filter
84 # the matches from the first seed finding run.
85
86 approx_task = ApproximationTask(config_obj)
87 approx_task.createJobs()
88 approx_task.submit()
89 approx_task.checkIfTaskFinished()
90
91 # After filtering combine the filtered matches from the first run and the
92 # found matches from the second run to a full dataset
93
94 pre_task = PreprocessingTask(...)
95 pre_task.createJobs()
96 pre_task.submit()
97 while pre_task.checkIfTaskFinished() == False:
98 sleep(20)
99
100 # Now that we have a dataset we can perform the accurate alignments for this
101 # data
102
103 align_task = AlignmentTask(...)
104 align_task.createJobs()
105 align_task.submit()
106 while align_task.checkIfTaskFinished() == False:
107 sleep(20)
108
109 # The results of the above alignment step can be converted to a data format
110 # needed for further postprocessing.
111
112 post_task = PostprocessingTask(...)
113 post_task.createJobs()
114 post_task.submit()
115 while post_task.checkIfTaskFinished() == False:
116 sleep(20)
117
118 print "Success!"
119
120
121 if __name__ == '__main__':
122 system_obj = System()
123 system_obj.run()