+ added dataset generation function for training set
[qpalma.git] / scripts / qpalma_pipeline.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 #
13 # This file contains the main interface to the QPalma pipeline.
14 #
15
16 import os
17 import os.path
18 import pdb
19 import sys
20
21 from qpalma.gridtools import ApproximationTask,PreprocessingTask
22 from qpalma.gridtools import AlignmentTask,PostprocessingTask
23
24 from qpalma.DatasetUtils import generateDataset
25
26 from SettingsParser import parseSettings
27
28
29 Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
30
31
32 class System:
33 """
34 This class wraps the outer loop of the qpalma project
35
36 It is responsible for:
37
38 - loading and checking the config file(s)
39 - setting up the different pipeline modules
40 - run the experiment and report the results
41
42 """
43
44 def __init__(self,filename):
45 """
46 Inititalize the system by loading and parsing the settings file to obtain
47 all parameters.
48 """
49
50 self.global_settings = parseSettings(filename)
51
52
53 def training(self):
54 """
55 This function is responsible for the whole training process. It first
56 converts the data to the right format needed by QPalma for the training
57 algorithm.
58 """
59
60 pre_task = TrainingPreprocessingTask(self.global_settings)
61 pre_task.createJobs()
62 pre_task.submit()
63 pre_task.checkIfTaskFinished()
64
65
66 def prediction(self):
67 """
68 This function encapsulates all steps needed to perform a prediction. Given
69 the parameter of the training and paths to a prediction set it will
70 generate several output files containing the spliced alignments
71 """
72
73 # Before creating a candidate spliced read dataset we have to first filter
74 # the matches from the first seed finding run.
75
76 approx_task = ApproximationTask(self.global_settings)
77 approx_task.CreateJobs()
78 approx_task.Submit()
79 approx_task.CheckIfTaskFinished()
80
81 # After filtering combine the filtered matches from the first run and the
82 # found matches from the second run to a full dataset
83
84 generateDataset(self.global_settings)
85 #pre_task = PreprocessingTask(self.global_settings)
86 #pre_task.CreateJobs()
87 #pre_task.Submit()
88 #pre_task.CheckIfTaskFinished()
89
90 sys.exit(0)
91
92 # Now that we have a dataset we can perform the accurate alignments for this
93 # data
94
95 align_task = AlignmentTask(self.global_settings)
96 align_task.CreateJobs()
97 align_task.Submit()
98 align_task.CheckIfTaskFinished()
99
100 # The results of the above alignment step can be converted to a data format
101 # needed for further postprocessing.
102
103 post_task = PostprocessingTask(self.global_settings)
104 post_task.CreateJobs()
105 post_task.Submit()
106 post_task.CheckIfTaskFinished()
107
108 print "Success!"
109
110
111 if __name__ == '__main__':
112 filename = sys.argv[1]
113 assert os.path.exists(filename), Errormsg
114 system_obj = System(filename)
115 system_obj.prediction()
116
117 #system_obj.training()