+ restructured test cases
[qpalma.git] / scripts / qpalma_pipeline.py
index 5e4bea2..bcb2a12 100644 (file)
@@ -21,12 +21,14 @@ import sys
 from qpalma.gridtools import ApproximationTask,PreprocessingTask
 from qpalma.gridtools import AlignmentTask,PostprocessingTask
 
-from qpalma.DatasetUtils import generateDataset
+from qpalma.DatasetUtils import generatePredictionDataset,generateTrainingDataset
 
-from SettingsParser import parseSettings
+from qpalma.SettingsParser import parseSettings
 
+from qpalma.utils import logwrite
 
-Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
+
+Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
 
 
 class System:
@@ -47,7 +49,8 @@ class System:
       all parameters.
       """
 
-      self.global_settings = parseSettings(filename)
+      self.settings = parseSettings(filename)
+      logwrite('Parsed settings system set up.',self.settings)
 
 
    def training(self):
@@ -56,12 +59,30 @@ class System:
       converts the data to the right format needed by QPalma for the training
       algorithm.
       """
+      logwrite('Begin of training.\n',self.settings)
+
+      print '#'*80
+      print '\t\t\tStarting approximation...\n'
+      print '#'*80
 
-      pre_task = TrainingPreprocessingTask(self.global_settings)
+      # When we are given only genomic reads we first generate artificially spliced
+      # ones in order to generate a training set
+      pre_task = TrainingPreprocessingTask(self.settings)
       pre_task.createJobs()
       pre_task.submit() 
       pre_task.checkIfTaskFinished()
 
+      # Collect the data and create a pickled training set
+      generateTrainingDataset(self.settings)
+
+      # Now that we have a dataset we can perform training
+      train_task = TrainingTask(self.settings)
+      train_task.CreateJobs()
+      train_task.Submit()
+      train_task.CheckIfTaskFinished()
+
+      logwrite('End of training.\n',self.settings)
+
 
    def prediction(self):
       """
@@ -70,10 +91,16 @@ class System:
       generate several output files containing the spliced alignments
       """
 
+      logwrite('Begin of prediction.\n',self.settings)
+
+      print '#'*80
+      print '\t\t\tStarting approximation...\n'
+      print '#'*80
+
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
-      approx_task = ApproximationTask(self.global_settings)
+      approx_task = ApproximationTask(self.settings)
       approx_task.CreateJobs()
       approx_task.Submit()
       approx_task.CheckIfTaskFinished()
@@ -81,37 +108,48 @@ class System:
       # After filtering combine the filtered matches from the first run and the
       # found matches from the second run to a full dataset
 
-      generateDataset(self.global_settings)
-      #pre_task = PreprocessingTask(self.global_settings)
-      #pre_task.CreateJobs()
-      #pre_task.Submit() 
-      #pre_task.CheckIfTaskFinished()
+      print '#'*80
+      print '\t\t\tStarting dataset generation...\n'
+      print '#'*80
 
-      sys.exit(0)
+      generatePredictionDataset(self.settings)
 
-      # Now that we have a dataset we can perform the accurate alignments for this
-      # data
+      print '#'*80
+      print '\t\t\tStarting alignments...\n'
+      print '#'*80
 
-      align_task = AlignmentTask(self.global_settings)
+      # Now that we have a dataset we can perform accurate alignments
+      align_task = AlignmentTask(self.settings)
       align_task.CreateJobs()
       align_task.Submit()
       align_task.CheckIfTaskFinished()
 
+      print '#'*80
+      print '\t\t\tPostprocessing...\n'
+      print '#'*80
+
       # The results of the above alignment step can be converted to a data format
       # needed for further postprocessing.
-
-      post_task = PostprocessingTask(self.global_settings)
+      post_task = PostprocessingTask(self.settings)
       post_task.CreateJobs()
       post_task.Submit()
       post_task.CheckIfTaskFinished()
 
-      print "Success!"
+      logwrite('End of prediction.\n',self.settings)
    
 
 if __name__ == '__main__':
-   filename = sys.argv[1]
+   mode     = sys.argv[1]
+   assert mode in ['predict','train'], Errormsg
+   filename = sys.argv[2]
    assert os.path.exists(filename), Errormsg
+
+   # creating system object
    system_obj = System(filename)
-   system_obj.prediction()
 
-   #system_obj.training()
+   if mode == 'predict':
+      system_obj.prediction()
+   elif mode == 'train':
+      system_obj.training()
+   else:
+      assert False