+ fixed minor inconsistencies in the code
authorFabio <fabio@congo.fml.local>
Wed, 1 Oct 2008 13:47:07 +0000 (15:47 +0200)
committerFabio <fabio@congo.fml.local>
Wed, 1 Oct 2008 13:47:07 +0000 (15:47 +0200)
+ first version where all 4 pipeline steps are running automated

qpalma/OutputFormat.py
qpalma/SettingsParser.py
qpalma/gridtools.py
scripts/qpalma_main.py
scripts/qpalma_pipeline.py

index 41d730a..af7178c 100644 (file)
@@ -99,7 +99,7 @@ def getUniquePrediction(allPredictions):
    return allUniquePredictions 
 
 
-def recalculatePositions(chromo,start_pos,strand,start,ends,seqInfo,window_size):
+def recalculatePositions(chromo,start_pos,strand,starts,ends,seqInfo,window_size):
    """
    If we work on the negative strand we have to recalculate the indices other
    wise we just have to add an offset i.e. the start position.
@@ -133,7 +133,7 @@ def createAlignmentOutput(settings,chunk_fn,result_fn):
 
    # fetch the data needed
    accessWrapper = DataAccessWrapper(settings)
-   seqInfo = SeqSpliceInfo(accessWrapper,,settings['allowed_fragments'])
+   seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
    # make predictions unique
    allUniquePredictions = getUniquePrediction(allPredictions)
@@ -173,7 +173,7 @@ def createBlatOutput(current_prediction,settings):
    # fetch the data needed
    allowed_fragments = settings['allowed_fragments']
    accessWrapper     = DataAccessWrapper(settings)
-   seqInfo           = SeqSpliceInfo(accessWrapper,range(1,allowed_fragments ))
+   seqInfo           = SeqSpliceInfo(accessWrapper,allowed_fragments)
 
    # this is the total size of the seed region
    window_size = 2*settings['half_window_size']
@@ -199,7 +199,6 @@ def createBlatOutput(current_prediction,settings):
 
    if len(qExonSizes) != num_exons:
       print 'BUG exon sizes %d'%id
-      continue
 
    new_line = ''
    #try:
index 44f6c9f..ab05a03 100644 (file)
@@ -47,7 +47,7 @@ def makeSettings(settings):
    settings['alignment_dir'] = jp(result_dir, 'alignment')
 
    for dir_name in ['approximation_dir','dataset_dir', 'preproc_dir', 'postproc_dir',\
-     'prediction_dir', 'training_dir', 'alignment']:
+     'prediction_dir', 'training_dir', 'alignment_dir']:
       try:
          os.mkdir(settings[dir_name])
          continue
index d7fd5f0..83a5da8 100644 (file)
@@ -8,6 +8,7 @@
 
 import cPickle
 import math
+import time
 import os
 import os.path
 import pdb
@@ -32,6 +33,7 @@ from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper
 from qpalma_main import QPalma
 
 jp = os.path.join
+pjoin = lambda *args: reduce(lambda x,y: jp(x,y),args)
 
 
 class ClusterTask(Thread):
@@ -73,7 +75,7 @@ class ClusterTask(Thread):
       After creation of jobs this function submits them to the cluster.
       """
       self.sid, self.jobids = submit_jobs(self.functionJobs)
-      #self.processedFunctionJobs = process_jobs(self.functionJobs)
+      #self.processedFunctionJobs = process_jobs(self.functionJobs,local=True,maxNumThreads=1)
 
 
    def Restart(self,id):
@@ -90,32 +92,17 @@ class ClusterTask(Thread):
       completed successfully.
       """
 
-      #print 'checking whether jobs finished...'
-      #while not get_status(self.sid, self.jobids):
-      #   time.sleep(7)
-      #print 'collecting jobs'
-      #retjobs = collect_jobs(self.sid, self.jobids, self.functionJobs)
-
-      print 'checking whether finished'
-      while not get_status(self.sid, self.jobids):
-         time.sleep(10)
-
       print 'collecting jobs'
-      retjobs = collect_jobs(self.sid, self.jobids, self.functionJobs)
+      retjobs = collect_jobs(self.sid, self.jobids, self.functionJobs, True)
       print "ret fields AFTER execution on cluster"
       for (i, job) in enumerate(retjobs):
          print "Job #", i, "- ret: ", job.ret
 
       print '--------------'
 
-      #print "ret fields AFTER execution on cluster"
-      #for (i, job) in enumerate(self.processedFunctionJobs):
-      #   print "Job #", i, "- ret: ", job.ret
-
       self.collectResults()
 
 
-
 class ApproximationTask(ClusterTask):
    """
    This task represents the first step towards a valid QPalma dataset.
@@ -283,7 +270,7 @@ class PostprocessingTask(ClusterTask):
       self.result_files = []
       for chunk_fn in chunks_fn:
          chunk_name  = chunk_fn[:chunk_fn.find('.')]
-         result_fn   = jp(self.result_dir,'%s.align'%chunk_name)
+         result_fn   = jp(self.result_dir,'%s.%s'%(chunk_name,self.settings['output_format']))
          chunk_fn = jp(run_dir,chunk_fn) 
 
          self.result_files.append(result_fn)
@@ -298,7 +285,7 @@ class PostprocessingTask(ClusterTask):
 
 
    def collectResults(self):
-      combined_fn = jp(self.result_dir,'all_alignments.align')
+      combined_fn = jp(self.result_dir,'all_alignments.%s'%self.settings['output_format'])
       combine_files(self.result_files,combined_fn)
 
 
index e68d67d..b3349be 100644 (file)
@@ -467,7 +467,6 @@ class QPalma:
       self.plog("Training completed")
       cPickle.dump(param,open(name,'w+'))
       self.logfh.close()
-      sys.exit(0)
    
 
 ###############################################################################
@@ -605,7 +604,6 @@ class QPalma:
       mes =  'Problem ctr %d' % self.problem_ctr
       self.plog(mes+'\n')
       self.logfh.close()
-      sys.exit(0)
 
 
    def calc_alignment(self, dna, read, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs):
index 6813b8c..20ed7b4 100644 (file)
@@ -99,13 +99,11 @@ class System:
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
-      approx_task = ApproximationTask(self.settings)
-      approx_task.CreateJobs()
-      approx_task.Submit()
-      approx_task.CheckIfTaskFinished()
+      #approx_task = ApproximationTask(self.settings)
+      #approx_task.CreateJobs()
+      #approx_task.Submit()
+      #approx_task.CheckIfTaskFinished()
       
-      sys.exit(0)
-
       # After filtering combine the filtered matches from the first run and the
       # found matches from the second run to a full dataset
 
@@ -113,21 +111,17 @@ class System:
       print '\t\t\tStarting dataset generation...\n'
       print '#'*80
 
-      generatePredictionDataset(self.settings)
-      #pre_task = PreprocessingTask(self.settings)
-      #pre_task.CreateJobs()
-      #pre_task.Submit() 
-      #pre_task.CheckIfTaskFinished()
+      #generatePredictionDataset(self.settings)
 
       print '#'*80
       print '\t\t\tStarting alignments...\n'
       print '#'*80
 
       # Now that we have a dataset we can perform accurate alignments
-      align_task = AlignmentTask(self.settings)
-      align_task.CreateJobs()
-      align_task.Submit()
-      align_task.CheckIfTaskFinished()
+      #align_task = AlignmentTask(self.settings)
+      #align_task.CreateJobs()
+      #align_task.Submit()
+      #align_task.CheckIfTaskFinished()
 
       print '#'*80
       print '\t\t\tPostprocessing...\n'