+ extended pipeline code
authorFabio <fabio@congo.fml.local>
Mon, 29 Sep 2008 13:31:48 +0000 (15:31 +0200)
committerFabio <fabio@congo.fml.local>
Mon, 29 Sep 2008 13:31:48 +0000 (15:31 +0200)
+ refactoring last step of the prediction task

qpalma/DatasetUtils.py
qpalma/OutputFormat.py
qpalma/SettingsParser.py
qpalma/gridtools.py
qpalma/utils.py
scripts/qpalma_main.py
scripts/qpalma_pipeline.py

index 31a156e..1d90744 100644 (file)
@@ -55,13 +55,13 @@ def generatePredictionDataset(settings):
    """
 
    #map_file = settings['map_file']
-   map_file = jp(settings['approximation_dir'],'map.vm')
+   map_file = jp(settings['approximation_dir'],'map.vm.spliced')
    assert os.path.exists(map_file), 'Error: Can not find map file'
 
    dataset = {}
 
    prb_offset = 64
-   prb_offset = 50
+   #prb_offset = 50
 
    # This tuple specifies an interval for valid Illumina Genome Analyzer quality values
    if settings['platform'] == 'IGA':
@@ -77,6 +77,9 @@ def generatePredictionDataset(settings):
    seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
    for line in open(map_file):
+      if line.startswith('#'):
+         continue
+
       if instance_counter > 0 and instance_counter % 5000 == 0:
          print 'processed %d examples' % instance_counter
 
index 6624161..4e9dd23 100644 (file)
@@ -18,12 +18,14 @@ from qpalma.utils import pprint_alignment
 from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper
 
 
+translation = '-acgtn_' # how aligned est and dna sequence is displayed (gap_char, 5 nucleotides, intron_char) 
+
+
 def alignment_reconstruct(current_prediction,num_exons):
    """
    We reconstruct the exact alignment to infer the positions of indels and the
    sizes of the respective exons.
    """
-   translation = '-acgtn_' # how aligned est and dna sequence is displayed (gap_char, 5 nucleotides, intron_char) 
 
    SpliceAlign = current_prediction['spliceAlign']
    estAlign    = current_prediction['estAlign']
@@ -66,31 +68,14 @@ def alignment_reconstruct(current_prediction,num_exons):
    return exonIdentities,exonGaps
 
 
-def createGenefindingOutput(current_loc,out_fname):
+def getUniquePrediction(allPredictions):
    """
+   Since one read can have several alignments due to several possible seed
+   positions we have to preprocess all prediction. This function return for
+   every read only one alignment that is the highest scoring under the QPalma
+   model.
    """
-   pass
-
-def createShoreOutput(current_loc,out_fname):
-   """
-   """
-   pass
-
 
-def createBLATOutput(current_loc,out_fname):
-
-   out_fh = open(out_fname,'w+')
-   allPredictions = cPickle.load(open(current_loc))
-   #allPredictions = current_loc
-
-   print 'Loaded %d examples' % len(allPredictions)
-
-   # fetch the data needed
-   accessWrapper = DataAccessWrapper(settings)
-   seqInfo = SeqSpliceInfo(accessWrapper,,settings['allowed_fragments'])
-
-   # Uniqueness filtering using alignment score for reads with several
-   # alignments
    allUniquePredictions = {}
 
    for new_prediction in allPredictions:
@@ -107,130 +92,188 @@ def createBLATOutput(current_loc,out_fname):
       else:
          allUniquePredictions[id] = new_prediction
 
-   written_lines = 0
-   for pred_idx,current_prediction in enumerate(allUniquePredictions.values()):
+   return allUniquePredictions 
 
-      id = current_prediction['id']
 
-      seq         = current_prediction['read']
-      dna         = current_prediction['dna']
+def createOutput(current_loc,out_fname,output_format):
+   """
+   Given all the predictions and a result file name. This function calculates
+   all aligmnment information and returns the result in the specified format.
+   """
 
-      # CHECK !!!
-      q1          = 'zzz'
+   out_fh = open(out_fname,'w+')
+   allPredictions = cPickle.load(open(current_loc))
+   #allPredictions = current_loc
 
-      chromo      = current_prediction['chr']
-      strand      = current_prediction['strand']
-      start_pos   = current_prediction['start_pos']
-      alignment   = current_prediction['alignment']
+   print 'Loaded %d examples' % len(allPredictions)
+
+   # fetch the data needed
+   accessWrapper = DataAccessWrapper(settings)
+   seqInfo = SeqSpliceInfo(accessWrapper,,settings['allowed_fragments'])
 
-      DPScores    = current_prediction['DPScores']
-      predExons   = current_prediction['predExons']
-      predExons   = [e+start_pos for e in predExons]
+   # make predictions unique
+   allUniquePredictions = getUniquePrediction(allPredictions)
 
-      (qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds,\
-      tExonSizes,tStarts, tEnds) = alignment
+   written_lines = 0
+   for pred_idx,current_prediction in enumerate(allUniquePredictions.values()):
 
-      if len(qExonSizes) != num_exons:
-         print 'BUG exon sizes %d'%id
-         continue
+      if output_format == 'BLAT':
+         # BLAT output
+         new_line = createBlatOutput(current_prediction)
 
-      EST2GFF = False
-      new_line = ''
-
-      if EST2GFF:
-         predExons = numpy.mat(predExons).reshape(len(predExons)/2,2)
-
-         match = predExons[0,1] - predExons[0,0] 
-         if predExons.shape == (2,2):
-            match += predExons[1,1] - predExons[1,0] 
-
-         mismatch = 0
-         repmatch = 0
-         Ns = 0
-         Qgapcnt = 0
-         Qgapbs = 0
-
-         Tgapcnt = 0
-         Tgapbs = 0
-
-         if predExons.shape == (2,2):
-            Tgapbs += predExons[1,0] - predExons[0,1]
-            Tgapcnt = 1
-
-         # &strand, Qname, &Qsize, 
-         # &Qstart, &Qend, Tname, &Tsize,
-         # &Tstart, &Tend, &blockcnt,exonsizes_str,Qstarts_str,Tstarts_str
-         #
-         # ATTENTION
-         #  
-         # we enforce equal exons sizes for q and t because we are missing indel
-         # positions of the alignment
-
-         if qExonSizes[0] != tExonSizes[0]:
-            continue
-
-         Qname = '%d(%2.2f)'%(id,DPScores.tolist()[0][0])
-         Qsize = len(seq)
-         Qstart = qStart
-         Qend = qEnd
-         Tname = 'CHR%d'%chromo
-
-         start_pos -= 2
-
-         Tsize = tEnd+1 + start_pos
-         Tstart =  tStart + start_pos
-         Tend = tEnd + start_pos
-         blockcnt = Tgapcnt+1
-         exonsizes_str = str(tExonSizes)[1:-1].replace(' ','')
-         Qstarts_str = str(qStarts)[1:-1].replace(' ','')
-         Tstarts_str = str(map(lambda x: x+start_pos,tStarts))[1:-1].replace(' ','')
-
-         new_line = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n"\
-         % (match, mismatch, repmatch, Ns, Qgapcnt, Qgapbs,\
-         Tgapcnt, Tgapbs, strand, Qname, Qsize, Qstart, Qend,\
-         Tname, Tsize, Tstart, Tend,\
-         blockcnt,exonsizes_str,Qstarts_str,Tstarts_str) #str(predExons))
+      elif output_format == 'ShoRe':
+         # ShoRe output
+         new_line = createShoReOutput(current_prediction)
+
+      elif output_format == 'mGene':
+         # Genefinding output for mGene
+         new_line = createGenefindingOutput(current_prediction)
 
       else:
-         #try:
-         exonIdentities,exonGaps = alignment_reconstruct(current_prediction,num_exons)
-         #except:
-         #   print 'Bug in example %d (idx: %d)' %\
-         #   (current_prediction['id'],pred_idx)
-         #   continue
-
-         #t_size = tEnd - tStart
-         #read_size = 38
-         #if strand == '-':
-         #   total_size = seqInfo.chromo_sizes[chromo]
-
-         #   start_pos = total_size - start_pos
-
-         #   qExonSizes.reverse()
-         #   qStarts  = [read_size-e for e in qEnds]
-         #   qStarts.reverse()
-         #   qEnds  = [read_size-e for e in qStarts]
-         #   qEnds.reverse()
-
-         #   tExonSizes.reverse()
-         #   tStarts  = [t_size-e for e in tEnds]
-         #   tStarts.reverse()
-         #   tEnds    = [t_size-e for e in tStarts]
-         #   tEnds.reverse()
-
-         #   exonIdentities.reverse()
-         #   exonGaps.reverse()
-
-         pp = lambda x : str(x)[1:-1].replace(' ','')
-
-         new_line = '%d\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
-         (id,chromo,strand,seq,str(q1)[1:-1],start_pos,qStart,qEnd,tStart,tEnd,num_exons,\
-         pp(qExonSizes),pp(qStarts),\
-         pp(qEnds),pp(tExonSizes),\
-         pp(tStarts),pp(tEnds),\
-         pp(exonIdentities),pp(exonGaps))
+         assert False
 
       out_fh.write(new_line)
       written_lines += 1
 
    print 'Wrote out %d lines' % written_lines
+
+
+def createBlatOutput(current_prediction):
+   """
+   This function takes one prediction as input and returns the corresponding
+   alignment in BLAT format.
+   """
+
+   id = current_prediction['id']
+
+   seq         = current_prediction['read']
+   dna         = current_prediction['dna']
+
+   # we do not really need the quality values here
+   q1          = 'zzz'
+
+   chromo      = current_prediction['chr']
+   strand      = current_prediction['strand']
+   start_pos   = current_prediction['start_pos']
+   alignment   = current_prediction['alignment']
+
+   DPScores    = current_prediction['DPScores']
+   predExons   = current_prediction['predExons']
+   predExons   = [e+start_pos for e in predExons]
+
+   (qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds,\
+   tExonSizes,tStarts, tEnds) = alignment
+
+   if len(qExonSizes) != num_exons:
+      print 'BUG exon sizes %d'%id
+      continue
+
+   EST2GFF = False
+   new_line = ''
+
+   if EST2GFF:
+      predExons = numpy.mat(predExons).reshape(len(predExons)/2,2)
+
+      match = predExons[0,1] - predExons[0,0] 
+      if predExons.shape == (2,2):
+         match += predExons[1,1] - predExons[1,0] 
+
+      mismatch = 0
+      repmatch = 0
+      Ns = 0
+      Qgapcnt = 0
+      Qgapbs = 0
+
+      Tgapcnt = 0
+      Tgapbs = 0
+
+      if predExons.shape == (2,2):
+         Tgapbs += predExons[1,0] - predExons[0,1]
+         Tgapcnt = 1
+
+      # &strand, Qname, &Qsize, 
+      # &Qstart, &Qend, Tname, &Tsize,
+      # &Tstart, &Tend, &blockcnt,exonsizes_str,Qstarts_str,Tstarts_str
+      #
+      # ATTENTION
+      #  
+      # we enforce equal exons sizes for q and t because we are missing indel
+      # positions of the alignment
+
+      if qExonSizes[0] != tExonSizes[0]:
+         continue
+
+      Qname = '%d(%2.2f)'%(id,DPScores.tolist()[0][0])
+      Qsize = len(seq)
+      Qstart = qStart
+      Qend = qEnd
+      Tname = 'CHR%d'%chromo
+
+      start_pos -= 2
+
+      Tsize = tEnd+1 + start_pos
+      Tstart =  tStart + start_pos
+      Tend = tEnd + start_pos
+      blockcnt = Tgapcnt+1
+      exonsizes_str = str(tExonSizes)[1:-1].replace(' ','')
+      Qstarts_str = str(qStarts)[1:-1].replace(' ','')
+      Tstarts_str = str(map(lambda x: x+start_pos,tStarts))[1:-1].replace(' ','')
+
+      new_line = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n"\
+      % (match, mismatch, repmatch, Ns, Qgapcnt, Qgapbs,\
+      Tgapcnt, Tgapbs, strand, Qname, Qsize, Qstart, Qend,\
+      Tname, Tsize, Tstart, Tend,\
+      blockcnt,exonsizes_str,Qstarts_str,Tstarts_str) #str(predExons))
+
+   else:
+      #try:
+      exonIdentities,exonGaps = alignment_reconstruct(current_prediction,num_exons)
+      #except:
+      #   print 'Bug in example %d (idx: %d)' %\
+      #   (current_prediction['id'],pred_idx)
+      #   continue
+
+      #t_size = tEnd - tStart
+      #read_size = 38
+      #if strand == '-':
+      #   total_size = seqInfo.chromo_sizes[chromo]
+
+      #   start_pos = total_size - start_pos
+
+      #   qExonSizes.reverse()
+      #   qStarts  = [read_size-e for e in qEnds]
+      #   qStarts.reverse()
+      #   qEnds  = [read_size-e for e in qStarts]
+      #   qEnds.reverse()
+
+      #   tExonSizes.reverse()
+      #   tStarts  = [t_size-e for e in tEnds]
+      #   tStarts.reverse()
+      #   tEnds    = [t_size-e for e in tStarts]
+      #   tEnds.reverse()
+
+      #   exonIdentities.reverse()
+      #   exonGaps.reverse()
+
+      pp = lambda x : str(x)[1:-1].replace(' ','')
+
+      new_line = '%d\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
+      (id,chromo,strand,seq,str(q1)[1:-1],start_pos,qStart,qEnd,tStart,tEnd,num_exons,\
+      pp(qExonSizes),pp(qStarts),\
+      pp(qEnds),pp(tExonSizes),\
+      pp(tStarts),pp(tEnds),\
+      pp(exonIdentities),pp(exonGaps))
+   
+   return new_line
+
+
+def createGenefindingOutput(current_loc,out_fname):
+   """
+   """
+   pass
+
+
+def createShoReOutput(current_loc,out_fname):
+   """
+   """
+   pass
index dd56766..44f6c9f 100644 (file)
@@ -44,9 +44,10 @@ def makeSettings(settings):
    settings['postproc_dir'] = jp(result_dir, 'postprocessing')
    settings['prediction_dir'] = jp(result_dir, 'prediction')
    settings['training_dir'] = jp(result_dir, 'training')
+   settings['alignment_dir'] = jp(result_dir, 'alignment')
 
    for dir_name in ['approximation_dir','dataset_dir', 'preproc_dir', 'postproc_dir',\
-     'prediction_dir', 'training_dir']:
+     'prediction_dir', 'training_dir', 'alignment']:
       try:
          os.mkdir(settings[dir_name])
          continue
index 912e93f..49bfc16 100644 (file)
@@ -28,7 +28,11 @@ from PipelineHeuristic import *
 
 import gridtools
 
-from utils import get_slices,split_file
+from utils import get_slices,split_file,combine_files
+
+from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper
+
+from qpalma_main import QPalma
 
 jp = os.path.join
 
@@ -71,8 +75,8 @@ class ClusterTask(Thread):
       """
       After creation of jobs this function submits them to the cluster.
       """
-      for current_job in self.functionJobs:
-         self.sid, self.jobids = submit_jobs([current_job])
+      self.sid, self.jobids = submit_jobs(self.functionJobs)
+      #self.processedFunctionJobs = process_jobs(self.functionJobs)
 
 
    def Restart(self,id):
@@ -89,16 +93,28 @@ class ClusterTask(Thread):
       completed successfully.
       """
 
+      #print 'checking whether jobs finished...'
+      #while not get_status(self.sid, self.jobids):
+      #   time.sleep(7)
+      #print 'collecting jobs'
+      #retjobs = collect_jobs(self.sid, self.jobids, self.functionJobs)
+
       print 'checking whether finished'
       while not get_status(self.sid, self.jobids):
-         time.sleep(7)
+         time.sleep(10)
+
       print 'collecting jobs'
       retjobs = collect_jobs(self.sid, self.jobids, self.functionJobs)
+      print "ret fields AFTER execution on cluster"
+      for (i, job) in enumerate(retjobs):
+         print "Job #", i, "- ret: ", job.ret
+
+      print '--------------'
 
       #print "ret fields AFTER execution on cluster"
-      #for (i, job) in enumerate(retjobs):
+      #for (i, job) in enumerate(self.processedFunctionJobs):
       #   print "Job #", i, "- ret: ", job.ret
-      #print '--------------'
+
       self.collectResults()
 
 
@@ -137,7 +153,7 @@ class ApproximationTask(ClusterTask):
 
       for idx in range(0,num_splits):
          data_fname     = jp(result_dir,'map.part_%d'%idx)
-         result_fname   = jp(result_dir,'map.vm.part_%d.heuristic'%idx)
+         result_fname   = jp(result_dir,'map.vm.part_%d.heuristic.spliced'%idx)
          self.result_files.append(result_fname)
 
          current_job = KybJob(gridtools.ApproximationTaskStarter,[run_fname,data_fname,param_fname,result_fname,self.settings])
@@ -153,7 +169,7 @@ class ApproximationTask(ClusterTask):
       result_dir  = self.settings['approximation_dir']
       combined_fn = jp(result_dir,'map.vm.spliced')
       combine_files(self.result_files,combined_fn)
-      combine_files([combined_fn,settings['spliced_reads_fn']],'map.vm')
+      combine_files([combined_fn,self.settings['spliced_reads_fn']],'map.vm')
 
 
 def ApproximationTaskStarter(run_fname,data_fname,param_fname,result_fname,settings):
@@ -193,7 +209,7 @@ class AlignmentTask(ClusterTask):
       run   = cPickle.load(open(self.settings['run_fn']))
       run['name'] = 'saved_run'
 
-      param = self.settings['prediction_parameter_fn']
+      param_fn = self.settings['prediction_parameter_fn']
 
       run['result_dir']    = self.settings['prediction_dir']
       dataset_fn           = self.settings['prediction_dataset_fn']
@@ -211,7 +227,7 @@ class AlignmentTask(ClusterTask):
             chunks.append((c_name,prediction_keys[slice[0]:slice[1]]))
 
       for c_name,current_chunk in chunks:
-         current_job = KybJob(gridtools.TaskStarter,[run,dataset_fn,current_chunk,param,c_name])
+         current_job = KybJob(gridtools.AlignmentTaskStarter,[self.settings,run,dataset_fn,current_chunk,param_fn,c_name])
          current_job.h_vmem = '20.0G'
          #current_job.express = 'True'
 
@@ -226,14 +242,14 @@ class AlignmentTask(ClusterTask):
       print 'Got %d job(s)' % len(self.functionJobs)
 
 
-def TaskStarter(run,dataset_fn,prediction_keys,param,set_name):
+def AlignmentTaskStarter(settings,run,dataset_fn,prediction_keys,param_fn,set_name):
    """
   
    """
-
-   qp = QPalma()
-   qp.predict(run,dataset_fn,prediction_keys,param,set_name)
-
+   accessWrapper = DataAccessWrapper(settings)
+   seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
+   qp = QPalma(run,seqInfo)
+   qp.init_prediction(dataset_fn,prediction_keys,param_fn,set_name)
    return 'finished prediction of set %s.' % set_name
 
 
@@ -252,8 +268,8 @@ class PostprocessingTask(ClusterTask):
    After QPalma predicted alignments this task postprocesses the data.
    """
 
-   def __init__(self):
-      ClusterTask.__init__(self)
+   def __init__(self,settings):
+      ClusterTask.__init__(self,settings)
 
 
    def CreateJobs(self):
@@ -275,19 +291,21 @@ class PostprocessingTask(ClusterTask):
 
          self.result_files.append(result_fn)
 
-         current_job = KybJob(grid_alignment.TaskStarter,[chunk_fn,result_fn])
+         current_job = KybJob(gridtools.PostProcessingTaskStarter,[chunk_fn,result_fn])
          current_job.h_vmem = '15.0G'
          current_job.express = 'True'
 
          print "job #1: ", current_job.nativeSpecification
 
+         self.functionJobs.append(current_job)
+
 
    def collectResults(self):
       combined_fn = jp(self.result_dir,'all_alignments.align')
       combine_files(self.result_files,combined_fn)
 
 
-def TaskStarter(chunk_fn,result_fn):
+def PostProcessingTaskStarter(chunk_fn,result_fn):
    create_alignment_file(chunk_fn,result_fn)
 
 
@@ -300,7 +318,7 @@ class DummyTask(ClusterTask):
       ClusterTask.__init__(self)
 
 
-   def TaskStarter(param):
+   def DummyTaskStarter(param):
       create_alignment_file(chunk_fn,result_fn)
 
 
@@ -313,7 +331,7 @@ class DummyTask(ClusterTask):
          result_fn   = jp(result_dir,'%s.align_remap'%chunk_name)
          chunk_fn = jp(run_dir,chunk_fn) 
 
-         current_job = KybJob(grid_alignment.g_alignment,[chunk_fn,result_fn])
+         current_job = KybJob(grid_alignment.DummyTaskStarter,[chunk_fn,result_fn])
          current_job.h_vmem = '15.0G'
          current_job.express = 'True'
 
index a91db33..f80f7ad 100644 (file)
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import pdb
+import sys
 import os.path
 
 from numpy.matlib import mat,zeros,ones,inf
index e70e6d2..0bad5d9 100644 (file)
@@ -32,7 +32,7 @@ from qpalma.computeSpliceAlignWithQuality import *
 from qpalma.TrainingParam import Param
 from qpalma.Plif import Plf,compute_donacc
 
-from Utils import pprint_alignment, get_alignment
+from qpalma.utils import pprint_alignment, get_alignment
 
 class SpliceSiteException:
    pass
index 6730974..6813b8c 100644 (file)
@@ -61,16 +61,20 @@ class System:
       """
       logwrite('Begin of training.\n',self.settings)
 
+      print '#'*80
+      print '\t\t\tStarting approximation...\n'
+      print '#'*80
+
       # 
       pre_task = TrainingPreprocessingTask(self.settings)
       pre_task.createJobs()
       pre_task.submit() 
       pre_task.checkIfTaskFinished()
 
-      # 
+      # Collect the data and create a pickled training set
       generateTrainingDataset(self.settings)
 
-      # Now that we have a dataset we can perform accurate trainments
+      # Now that we have a dataset we can perform training
       train_task = TrainingTask(self.settings)
       train_task.CreateJobs()
       train_task.Submit()
@@ -78,6 +82,7 @@ class System:
 
       logwrite('End of training.\n',self.settings)
 
+
    def prediction(self):
       """
       This function encapsulates all steps needed to perform a prediction. Given
@@ -87,29 +92,47 @@ class System:
 
       logwrite('Begin of prediction.\n',self.settings)
 
+      print '#'*80
+      print '\t\t\tStarting approximation...\n'
+      print '#'*80
+
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
-      #approx_task = ApproximationTask(self.settings)
-      #approx_task.CreateJobs()
-      #approx_task.Submit()
-      #approx_task.CheckIfTaskFinished()
+      approx_task = ApproximationTask(self.settings)
+      approx_task.CreateJobs()
+      approx_task.Submit()
+      approx_task.CheckIfTaskFinished()
       
+      sys.exit(0)
+
       # After filtering combine the filtered matches from the first run and the
       # found matches from the second run to a full dataset
 
+      print '#'*80
+      print '\t\t\tStarting dataset generation...\n'
+      print '#'*80
+
       generatePredictionDataset(self.settings)
       #pre_task = PreprocessingTask(self.settings)
       #pre_task.CreateJobs()
       #pre_task.Submit() 
       #pre_task.CheckIfTaskFinished()
 
+      print '#'*80
+      print '\t\t\tStarting alignments...\n'
+      print '#'*80
+
       # Now that we have a dataset we can perform accurate alignments
       align_task = AlignmentTask(self.settings)
       align_task.CreateJobs()
       align_task.Submit()
       align_task.CheckIfTaskFinished()
 
+      print '#'*80
+      print '\t\t\tPostprocessing...\n'
+      print '#'*80
+
       # The results of the above alignment step can be converted to a data format
       # needed for further postprocessing.
       post_task = PostprocessingTask(self.settings)