+ added configuration file parsing and checking functions
[qpalma.git] / scripts / qpalma_pipeline.py
index 17d64e0..42e8c38 100644 (file)
 # This file contains the main interface to the QPalma pipeline.
 #
 
+import os
+import os.path
+import pdb
+import sys
+
 from optparse import OptionParser
 
 from qpalma.gridtools import ApproximationTask,PreprocessingTask
 from qpalma.gridtools import AlignmentTask,PostprocessingTask
 
+
+Errormsg = """Usage is: python qpalma_pipeline.py <config filename>"""
+
+
+"""
 def create_option_parser():
    parser = OptionParser()
 
@@ -31,13 +41,76 @@ def create_option_parser():
    parser.add_option("-xx", "--clear", action="store_false", dest="verbose", help="cleanup directories delete all created data")
 
    return parser
+"""
+
+jp = os.path.join
+
+def parseSettings(filename):
+   """
+   """
+
+   #global_settings = {\
+   #'result_dir':'/fml/ag-raetsch/...',\
+   #'read_ascii_data_fn':'/fml/ag-raetsch/...',\
+   #'num_splits':50
+   #'global_log_fn':'~/qpalma.log'
+   #}
+
+   global_settings = {}
+
+   for line in open(filename):
+      if not line.strip() or line.startswith('#'):
+         continue
+
+      key,val = line.strip().replace(' ','').split('=')
+      global_settings[key] = val
+
+   return global_settings
+
+
+def makeSettings(global_settings):
+   """
+   
+   """
+
+   # first check wether the top level result directory exists
+   assert os.path.exists(global_settings['result_dir']), 'Error: You have to specify a existing result directory!'
+
+   result_dir = global_settings['result_dir']
+
+   # now create some subdirectories needed for the different steps performed by QPalma 
+   global_settings['approximation_dir'] = jp(result_dir,'approximation')
+   global_settings['preproc_dir']       = jp(result_dir,'preprocessing')
+   global_settings['postproc_dir']      = jp(result_dir,'postprocessing')
+   global_settings['prediction_dir']    = jp(result_dir,'prediction')
+   global_settings['training_dir']      = jp(result_dir,'training')
+
+   for dir_name in ['approximation_dir', 'preproc_dir', 'postproc_dir', 'prediction_dir', 'training_dir']:
+      try:
+         os.mkdir(global_settings[dir_name])
+      except:
+         print 'Error: There was a problem generating the subdirectory: %s' % dir_name
+
+   try:
+      os.mkdir(global_settings['global_log_fn'])
+   except:
+      print 'Error: There was a problem generating the logfile %s' % global_settings['global_log_fn']
+
+   return global_settings
+
+
+def checkSettings(global_settings):
+   for key,val in global_settings.items():
+      if key.endswith('_fn'):
+         assert os.path.exists(val), 'Error: Path/File %s with value %s does not seem to exist!' % (key,val)
+
+
+      if key.endswith('_dir'):
+         assert os.path.exists(val), 'Error: Path/File %s with value %s does not seem to exist!' % (key,val)
+   
+
+   return True
 
-global_settings = {\
-'experiment_dir':'/fml/ag-raetsch/...',\
-'read_ascii_data_fn':'/fml/ag-raetsch/...',\
-'num_splits':50
-'global_log_fn':'~/qpalma.log'
-}
 
 
 
@@ -53,11 +126,22 @@ class System:
 
    """
 
-   def __init__(self):
+   def __init__(self,filename):
       """
+      Inititalize the system by loading and parsing the settings file to obtain
+      all parameters.
       """
-      parser = create_option_parser()
-      (options, args) = parser.parse_args()
+
+      #parser = create_option_parser()
+      #(options, args) = parser.parse_args()
+
+      global_settings = parseSettings(filename)
+      global_settings = makeSettings(global_settings)
+      assert checkSettings(global_settings), 'Check your settings some entries were invalid!'
+
+      self.global_settings = global_settings
+
+      pdb.set_trace()
 
    def training(self):
       """
@@ -66,7 +150,7 @@ class System:
       algorithm.
       """
 
-      pre_task = TrainingPreprocessingTask(global_settings,run_specific_settings)
+      pre_task = TrainingPreprocessingTask(self.global_settings)
       pre_task.createJobs()
       pre_task.submit() 
       while pre_task.checkIfTaskFinished() == False:
@@ -83,7 +167,7 @@ class System:
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
-      approx_task = ApproximationTask(config_obj)
+      approx_task = ApproximationTask(self.global_settings)
       approx_task.createJobs()
       approx_task.submit()
       approx_task.checkIfTaskFinished()
@@ -91,33 +175,33 @@ class System:
       # After filtering combine the filtered matches from the first run and the
       # found matches from the second run to a full dataset
 
-      pre_task = PreprocessingTask(...)
+      pre_task = PreprocessingTask(self.global_settings)
       pre_task.createJobs()
       pre_task.submit() 
-      while pre_task.checkIfTaskFinished() == False:
-         sleep(20)
+      pre_task.checkIfTaskFinished()
 
       # Now that we have a dataset we can perform the accurate alignments for this
       # data
 
-      align_task = AlignmentTask(...)
+      align_task = AlignmentTask(self.global_settings)
       align_task.createJobs()
       align_task.submit()
-      while align_task.checkIfTaskFinished() == False:
-         sleep(20)
+      align_task.checkIfTaskFinished()
 
       # The results of the above alignment step can be converted to a data format
       # needed for further postprocessing.
 
-      post_task = PostprocessingTask(...)
+      post_task = PostprocessingTask(self.global_settings)
       post_task.createJobs()
       post_task.submit()
-      while post_task.checkIfTaskFinished() == False:
-         sleep(20)
+      post_task.checkIfTaskFinished()
 
       print "Success!"
    
 
 if __name__ == '__main__':
-   system_obj = System() 
-   system_obj.run()
+   filename = sys.argv[1]
+   assert os.path.exists(filename), Errormsg
+   system_obj = System(filename)
+   #system_obj.prediction()
+   #system_obj.training()