+ update makefiles to fetch automatically valid Python includes and libs
[qpalma.git] / qpalma / gridtools.py
index 54aae6d..6aeb856 100644 (file)
@@ -123,9 +123,8 @@ class ApproximationTask(ClusterTask):
 
       #run_dir  = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
       #param_fname    = jp(run_dir,'param_526.pickle')
-      param_fname = self.settings['prediction_parameter_fn']
+      param_fname = self.settings['prediction_param_fn']
       #run_fname      = jp(run_dir,'run_obj.pickle')
-      run_fname = self.settings['run_fn']
 
       #result_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/main'
       result_dir = self.settings['approximation_dir']
@@ -140,7 +139,7 @@ class ApproximationTask(ClusterTask):
          result_fname   = jp(result_dir,'map.vm.part_%d'%idx)
          self.result_files.append(result_fname)
 
-         current_job = KybJob(gridtools.ApproximationTaskStarter,[run_fname,data_fname,param_fname,result_fname,self.settings])
+         current_job = KybJob(gridtools.ApproximationTaskStarter,[data_fname,param_fname,result_fname,self.settings])
          current_job.h_vmem = '25.0G'
          #current_job.express = 'True'
 
@@ -157,8 +156,8 @@ class ApproximationTask(ClusterTask):
       combine_files([combined_fn,self.settings['spliced_reads_fn']],'map.vm')
 
 
-def ApproximationTaskStarter(run_fname,data_fname,param_fname,result_fname,settings):
-   ph1 = PipelineHeuristic(run_fname,data_fname,param_fname,result_fname,settings)
+def ApproximationTaskStarter(data_fname,param_fname,result_fname,settings):
+   ph1 = PipelineHeuristic(data_fname,param_fname,result_fname,settings)
    ph1.filter()
 
    return 'finished filtering set %s.' % data_fname
@@ -189,8 +188,6 @@ class AlignmentTask(ClusterTask):
 
       num_splits = self.settings['num_splits']
 
-      jp = os.path.join
-
       dataset_fn           = self.settings['prediction_dataset_fn']
       prediction_keys_fn   = self.settings['prediction_dataset_keys_fn']
 
@@ -238,12 +235,40 @@ class TrainingTask(ClusterTask):
    This class represents the cluster task of training QPalma.
    """
 
-   def __init__(self):
-      ClusterTask.__init__(self)
+   def __init__(self,settings):
+      ClusterTask.__init__(self,settings)
+
 
    def CreateJobs(self):
+      """
+
+      """
+
+      dataset_fn     = self.settings['training_dataset_fn']
+
+      set_name = 'training_set'
+
+      current_job = KybJob(gridtools.TrainingTaskStarter,[dataset_fn,self.settings,set_name])
+      current_job.h_vmem = '2.0G'
+      current_job.express = 'True'
+
+      print "job #1: ", current_job.nativeSpecification
+
+      self.functionJobs.append(current_job)
+
+      print 'Got %d job(s)' % len(self.functionJobs)
+
+   
+   def collectResults(self):
       pass
-      #cPickle.dump(settings,open(jp(,'training_settings.pickle''run_obj.pickle','w+'))
+
+
+def TrainingTaskStarter(dataset_fn,settings,set_name):
+   accessWrapper = DataAccessWrapper(settings)
+   seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
+   qp = QPalma(seqInfo)
+   qp.init_training(dataset_fn,settings,set_name)
+   return 'finished prediction of set %s.' % set_name
 
 
 class PostprocessingTask(ClusterTask):
@@ -275,7 +300,7 @@ class PostprocessingTask(ClusterTask):
          self.result_files.append(result_fn)
 
          current_job = KybJob(gridtools.PostProcessingTaskStarter,[self.settings,chunk_fn,result_fn])
-         current_job.h_vmem = '15.0G'
+         current_job.h_vmem = '2.0G'
          current_job.express = 'True'
 
          print "job #1: ", current_job.nativeSpecification