+ added some testcases
[qpalma.git] / scripts / PipelineHeuristic.py
index 1afcff7..e6e3a39 100644 (file)
@@ -3,11 +3,53 @@
 
 import cPickle
 import sys
-import pydb
 import pdb
 import os
 import os.path
-import math
+import resource
+
+from qpalma.computeSpliceWeights import *
+from qpalma.set_param_palma import *
+from qpalma.computeSpliceAlignWithQuality import *
+
+from numpy.matlib import mat,zeros,ones,inf
+from numpy import inf,mean
+
+from qpalma.parsers import *
+
+from ParaParser import *
+
+from qpalma.Lookup import LookupTable
+
+from qpalma.sequence_utils import reverse_complement,unbracket_seq
+
+
+class BracketWrapper:
+   #fields = ['id', 'chr', 'pos', 'strand', 'mismatches', 'length',\
+   #'offset', 'seq', 'prb', 'cal_prb', 'chastity']
+   fields = ['id', 'chr', 'pos', 'strand', 'seq', 'prb']
+
+   def __init__(self,filename):
+      self.parser = ParaParser("%lu%d%d%s%s%s",self.fields,len(self.fields),IN_VECTOR)
+      self.parser.parseFile(filename)
+
+   def __len__(self):
+      return self.parser.getSize(IN_VECTOR)
+      
+   def __getitem__(self,key):
+      return self.parser.fetchEntry(key)
+
+   def __iter__(self):
+      self.counter = 0
+      self.size = self.parser.getSize(IN_VECTOR)
+      return self
+
+   def next(self):
+      if not self.counter < self.size:
+         raise StopIteration
+      return_val = self.parser.fetchEntry(self.counter)
+      self.counter += 1
+      return return_val
 
 
 class PipelineHeuristic:
@@ -16,88 +58,601 @@ class PipelineHeuristic:
    vmatch is spliced an should be then newly aligned using QPalma or not.
    """
 
-   def __init__(self,filename):
-      self.data_filename = filename
+   def __init__(self,run_fname,data_fname,param_fname,result_fname):
+      """
+      We need a run object holding information about the nr. of support points
+      etc.
+      """
 
+      run = cPickle.load(open(run_fname))
+      self.run = run
 
-   def filter(self):
-      SeqInfo, Exons, OriginalEsts, Qualities,\
-      AlternativeSequences = paths_load_data(data_filename,'training',None,self.ARGS)
+      start = cpu()
+      # old version
+      #self.all_remapped_reads = parse_map_vm_heuristic(data_fname)
+      self.all_remapped_reads = BracketWrapper(data_fname)
+      stop = cpu()
+
+      print 'parsed %d reads in %f sec' % (len(self.all_remapped_reads),stop-start)
+
+      #g_dir    = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+      #acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+      #don_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+      #g_fmt = 'contig%d.dna.flat'
+      #s_fmt = 'contig_%d%s'
+
+      #self.chromo_range = range(0,1099)
+
+      g_dir    = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+      acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+      don_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+      g_fmt = 'chr%d.dna.flat'
+      s_fmt = 'contig_%d%s'
+
+      self.chromo_range = range(1,6)
+
+      start = cpu()
+      self.lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,self.chromo_range)
+      stop = cpu()
+
+      print 'prefetched sequence and splice data in %f sec' % (stop-start)
+
+      self.result_spliced_fh = open('%s.spliced'%result_fname,'w+')
+      self.result_unspliced_fh = open('%s.unspliced'%result_fname,'w+')
+
+      start = cpu()
+
+      self.data_fname = data_fname
+
+      self.param = cPickle.load(open(param_fname))
+      
+      # Set the parameters such as limits penalties for the Plifs
+      [h,d,a,mmatrix,qualityPlifs] = set_param_palma(self.param,True,run)
+
+      self.h = h
+      self.d = d
+      self.a = a
+      self.mmatrix = mmatrix
+      self.qualityPlifs = qualityPlifs
+
+      self.read_size    = 38
+
+      # parameters of the heuristics to decide whether the read is spliced
+      #self.splice_thresh      = 0.005
+      self.splice_thresh      = 0.01
+      self.max_intron_size    = 2000
+      self.max_mismatch       = 2 
+      #self.splice_stop_thresh = 0.99
+      self.splice_stop_thresh = 0.8
+      self.spliced_bias       = 0.0
+
+      param_lines = [\
+      ('%f','splice_thresh',self.splice_thresh),\
+      ('%d','max_intron_size',self.max_intron_size),\
+      ('%d','max_mismatch',self.max_mismatch),\
+      ('%f','splice_stop_thresh',self.splice_stop_thresh),\
+      ('%f','spliced_bias',self.spliced_bias)]
+
+      param_lines = [('# %s: '+form+'\n')%(name,val) for form,name,val in param_lines]
+      param_lines.append('# data: %s\n'%self.data_fname)
+      param_lines.append('# param: %s\n'%param_fname)
+
+      #pdb.set_trace()
+
+      for p_line in param_lines:
+         self.result_spliced_fh.write(p_line)
+         self.result_unspliced_fh.write(p_line)
    
-      for idx in range(len(SeqInfo)):
-         currentVMatchAlignment
-         vMatchScore = calcAlignmentScore(currentVMatchAlignment)
+      #self.original_reads = {}
+
+      # we do not have this information
+      #for line in open(reads_pipeline_fn):
+      #   line = line.strip()
+      #   id,seq,q1,q2,q3 = line.split()
+      #   id = int(id)
+      #   self.original_reads[id] = seq
+
+      lengthSP    = run['numLengthSuppPoints']
+      donSP       = run['numDonSuppPoints']
+      accSP       = run['numAccSuppPoints']
+      mmatrixSP   = run['matchmatrixRows']*run['matchmatrixCols']
+      numq        = run['numQualSuppPoints']
+      totalQualSP = run['totalQualSuppPoints']
+
+      currentPhi = zeros((run['numFeatures'],1))
+      currentPhi[0:lengthSP]                                            = mat(h.penalties[:]).reshape(lengthSP,1)
+      currentPhi[lengthSP:lengthSP+donSP]                               = mat(d.penalties[:]).reshape(donSP,1)
+      currentPhi[lengthSP+donSP:lengthSP+donSP+accSP]                   = mat(a.penalties[:]).reshape(accSP,1)
+      currentPhi[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP]   = mmatrix[:]
+
+      totalQualityPenalties = self.param[-totalQualSP:]
+      currentPhi[lengthSP+donSP+accSP+mmatrixSP:]                    = totalQualityPenalties[:]
+      self.currentPhi = currentPhi
+
+      # we want to identify spliced reads 
+      # so true pos are spliced reads that are predicted "spliced"
+      self.true_pos  = 0
+      
+      # as false positives we count all reads that are not spliced but predicted
+      # as "spliced"
+      self.false_pos = 0
+
+      self.true_neg  = 0
+      self.false_neg = 0
+
+      # total time spend for get seq and scores
+      self.get_time  = 0.0
+      self.calcAlignmentScoreTime = 0.0
+      self.alternativeScoresTime = 0.0
+
+      self.count_time = 0.0
+      self.main_loop = 0.0
+      self.splice_site_time = 0.0
+      self.computeSpliceAlignWithQualityTime = 0.0
+      self.computeSpliceWeightsTime = 0.0
+      self.DotProdTime = 0.0
+      self.array_stuff = 0.0
+      stop = cpu()
+
+      self.init_time = stop-start
+
+   def filter(self):
+      """
+      This method...
+      """
+      run = self.run 
+
+
+      ctr = 0
+      unspliced_ctr  = 0
+      spliced_ctr    = 0
+
+      print 'Starting filtering...'
+      _start = cpu()
+
+      #for readId,currentReadLocations in all_remapped_reads.items():
+         #for location in currentReadLocations[:1]:
+
+      for location,original_line in self.all_remapped_reads:
+         
+         if ctr % 1000 == 0:
+            print ctr
+
+         id       = location['id']
+         chr      = location['chr']
+         pos      = location['pos']
+         strand   = location['strand']
+         #mismatch = location['mismatches']
+         #length   = location['length']
+         #off      = location['offset']
+         seq      = location['seq']
+         prb      = location['prb']
+         #cal_prb  = location['cal_prb']
+         #chastity = location['chastity']
+
+         id = int(id)
+
+         seq = seq.lower()
+
+         strand_map = {'D':'+', 'P':'-'}
+
+         strand = strand_map[strand]
+
+         if not chr in self.chromo_range:
+            continue
 
-         alternativeAlignments = calcAlternativeAlignments(currentVMatchAlignment)
+         unb_seq = unbracket_seq(seq)
 
-         maxScore    = 0.0
-         maxAlignment = None
-         for currentAlternative in alternativeAlignments:
-            if currentScore > maxScore:
-               maxScore = alternativeScores 
-               maxAlignment = currentAlternative
+         # forgot to do this
+         if strand == '-':
+            #pos = self.lt1.seqInfo.chromo_sizes[chr]-pos-self.read_size
+            unb_seq = reverse_complement(unb_seq)
 
-            currentScore = calcAlignmentScore(currentAlternative))
+         effective_len = len(unb_seq)
+
+         genomicSeq_start  = pos
+         #genomicSeq_stop   = pos+effective_len-1
+         genomicSeq_stop   = pos+effective_len
+
+         start = cpu()
+         #currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
+         currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
+
+         stop = cpu()
+         self.get_time += stop-start
+
+         dna            = currentDNASeq
+         exons          = zeros((2,1))
+         exons[0,0]     = 0
+         exons[1,0]     = effective_len
+         est            = unb_seq
+         original_est   = seq
+         quality        = map(lambda x:ord(x)-64,prb)
+
+         #pdb.set_trace()
+
+         currentVMatchAlignment = dna, exons, est, original_est, quality,\
+         currentAcc, currentDon
+
+         try:
+            alternativeAlignmentScores = self.calcAlternativeAlignments(location)
+         except:
+            alternativeAlignmentScores = []
+
+
+         if alternativeAlignmentScores == []:
+             # no alignment necessary
+             maxAlternativeAlignmentScore = -inf
+             vMatchScore = 0.0 
+         else:
+             maxAlternativeAlignmentScore = max(alternativeAlignmentScores)
+             # compute alignment for vmatch unspliced read
+             vMatchScore = self.calcAlignmentScore(currentVMatchAlignment)
+
+         start = cpu()
+         
+         #print 'vMatchScore/alternativeScore: %f %f ' % (vMatchScore,maxAlternativeAlignmentScore)
+         #print 'all candidates %s' % str(alternativeAlignmentScores)
+
+         new_id = id - 1000000300000
+
+         unspliced = False
+         # unspliced 
+         if new_id > 0: 
+            unspliced = True
 
          # Seems that according to our learned parameters VMatch found a good
          # alignment of the current read
-         if maxScore < vMatchScore:
-            pass
+         if maxAlternativeAlignmentScore < vMatchScore:
+            unspliced_ctr += 1
+
+            self.result_unspliced_fh.write(original_line+'\n') 
+
+            if unspliced:
+               self.true_neg += 1
+            else:
+               self.false_neg += 1
+
          # We found an alternative alignment considering splice sites that scores
          # higher than the VMatch alignment
          else:
-            pass
-            
+            spliced_ctr += 1
+
+            self.result_spliced_fh.write(original_line+'\n')
+
+            if unspliced:
+               self.false_pos += 1
+            else:
+               self.true_pos += 1
 
+         ctr += 1
+         stop = cpu()
+         self.count_time = stop-start
 
-   def calcAlternativeAlignments(self,currentVMatchAlignment):
+      _stop = cpu()
+      self.main_loop = _stop-_start
+
+      print 'Unspliced/Splice: %d %d'%(unspliced_ctr,spliced_ctr)
+      print 'True pos / false pos : %d %d'%(self.true_pos,self.false_pos)
+      print 'True neg / false neg : %d %d'%(self.true_neg,self.false_neg)
+
+
+   def findHighestScoringSpliceSites(self, currentAcc, currentDon, DNA, max_intron_size, read_size, splice_thresh):
+
+      def signum(a):
+          if a>0: 
+              return 1
+         elif a<0:
+              return -1
+          else:
+              return 0
+
+      proximal_acc   = []
+      for idx in xrange(max_intron_size, max_intron_size+read_size/2):
+          if currentAcc[idx]>= splice_thresh:
+            proximal_acc.append((idx,currentAcc[idx]))
+
+      proximal_acc.sort(lambda x,y: signum(x[1]-y[1])) 
+      proximal_acc=proximal_acc[-2:]
+
+      distal_acc   = []
+      for idx in xrange(max_intron_size+read_size, len(currentAcc)):
+          if currentAcc[idx]>= splice_thresh and idx+read_size<len(currentAcc):
+            distal_acc.append((idx, currentAcc[idx], DNA[idx+1:idx+read_size]))
+
+      #distal_acc.sort(lambda x,y: signum(x[1]-y[1])) 
+      #distal_acc=distal_acc[-2:]
+
+
+      proximal_don   = []
+      for idx in xrange(max_intron_size+read_size/2, max_intron_size+read_size):
+         if currentDon[idx] >= splice_thresh:
+            proximal_don.append((idx, currentDon[idx]))
+
+      proximal_don.sort(lambda x,y: signum(x[1]-y[1]))
+      proximal_don=proximal_don[-2:]
+
+      distal_don   = []
+      for idx in xrange(1, max_intron_size):
+         if currentDon[idx] >= splice_thresh and idx>read_size:
+            distal_don.append((idx, currentDon[idx], DNA[idx-read_size:idx]))
+
+      distal_don.sort(lambda x,y: y[0]-x[0])
+      #distal_don=distal_don[-2:]
+
+      return proximal_acc,proximal_don,distal_acc,distal_don
+
+   def calcAlternativeAlignments(self,location):
       """
       Given an alignment proposed by Vmatch this function calculates possible
       alternative alignments taking into account for example matched
       donor/acceptor positions.
       """
+
+      run = self.run
+
+      id       = location['id']
+      chr      = location['chr']
+      pos      = location['pos']
+      strand   = location['strand']
+      original_est = location['seq']
+      quality      = location['prb']
+      quality        = map(lambda x:ord(x)-64,quality)
+      #cal_prb  = location['cal_prb']
       
-      currentVMatchAlignment
+      original_est = original_est.lower()
+      est = unbracket_seq(original_est)
+      effective_len = len(est)
 
+      genomicSeq_start  = pos - self.max_intron_size
+      genomicSeq_stop   = pos + self.max_intron_size + len(est)
 
-      return alternativeAlignments 
+      strand_map = {'D':'+', 'P':'-'}
+      strand = strand_map[strand]
 
+      start = cpu()
+      #currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop, run['dna_flat_files'])
+      currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop, run['dna_flat_files'])
+      stop = cpu()
+      self.get_time += stop-start
+      dna   = currentDNASeq
+   
+      proximal_acc,proximal_don,distal_acc,distal_don = self.findHighestScoringSpliceSites(currentAcc, currentDon, dna, self.max_intron_size, len(est), self.splice_thresh)
+       
+      alternativeScores = []
+      
+      # inlined
+      h = self.h
+      d = self.d
+      a = self.a
+      mmatrix = self.mmatrix
+      qualityPlifs = self.qualityPlifs
+      # inlined
+
+      # find an intron on the 3' end
+      _start = cpu()
+      for (don_pos,don_score) in proximal_don:
+          DonorScore = calculatePlif(d, [don_score])[0]
+
+          for (acc_pos,acc_score,acc_dna) in distal_acc:
+
+              IntronScore = calculatePlif(h, [acc_pos-don_pos])[0] 
+              AcceptorScore = calculatePlif(a, [acc_score])[0] 
+          
+              #print 'don splice: ', (don_pos,don_score), (acc_pos,acc_score,acc_dna), (DonorScore,IntronScore,AcceptorScore)
+
+              # construct a new "original_est"
+              original_est_cut=''
+              
+              est_ptr=0
+              dna_ptr=self.max_intron_size
+              ptr=0
+              acc_dna_ptr=0
+              num_mismatch = 0
+              
+              while ptr<len(original_est):
+                  #print acc_dna_ptr,len(acc_dna),acc_pos,don_pos
+                  
+                  if original_est[ptr]=='[':
+                      dnaletter=original_est[ptr+1]
+                      estletter=original_est[ptr+2]
+                      if dna_ptr < don_pos:
+                          original_est_cut+=original_est[ptr:ptr+4]
+                          num_mismatch += 1
+                      else:
+                          if acc_dna[acc_dna_ptr]==estletter:
+                              original_est_cut += estletter # EST letter
+                          else:
+                              original_est_cut += '['+acc_dna[acc_dna_ptr]+estletter+']' # EST letter
+                              num_mismatch += 1
+                              #print '['+acc_dna[acc_dna_ptr]+estletter+']' 
+                          acc_dna_ptr+=1 
+                      ptr+=4 
+                  else:
+                      dnaletter=original_est[ptr]
+                      estletter=dnaletter
+                
+                      if dna_ptr < don_pos:
+                          original_est_cut+=estletter # EST letter
+                      else:
+                          if acc_dna[acc_dna_ptr]==estletter:
+                              original_est_cut += estletter # EST letter
+                          else:
+                              num_mismatch += 1
+                              original_est_cut += '['+acc_dna[acc_dna_ptr]+estletter+']' # EST letter
+                              #print '('+acc_dna[acc_dna_ptr]+estletter+')' 
+                          acc_dna_ptr+=1 
+                          
+                      ptr+=1
+
+                  if estletter=='-':
+                      dna_ptr+=1 
+                  elif dnaletter=='-':
+                      est_ptr+=1
+                  else:
+                      dna_ptr+=1 
+                      est_ptr+=1
+              if num_mismatch>self.max_mismatch:
+                  continue
+                         
+              assert(dna_ptr<=len(dna))
+              assert(est_ptr<=len(est))
+
+              #print original_est, original_est_cut              
+
+              score = computeSpliceAlignScoreWithQuality(original_est_cut, quality, qualityPlifs, run, self.currentPhi)
+              score += AcceptorScore + IntronScore + DonorScore + self.spliced_bias 
+         
+              alternativeScores.append(score)
+
+              if acc_score>=self.splice_stop_thresh:
+                  break
+              
+      _stop = cpu()
+      self.alternativeScoresTime += _stop-_start
+
+      # find an intron on the 5' end 
+      _start = cpu()
+      for (acc_pos,acc_score) in proximal_acc:
+
+          AcceptorScore = calculatePlif(a, [acc_score])[0]
+          
+          for (don_pos,don_score,don_dna) in distal_don:
+
+              DonorScore = calculatePlif(d, [don_score])[0]
+              IntronScore = calculatePlif(h, [acc_pos-don_pos])[0] 
+          
+              #print 'acc splice: ', (don_pos,don_score,don_dna), (acc_pos,acc_score), (DonorScore,IntronScore,AcceptorScore)
+
+              # construct a new "original_est"
+              original_est_cut=''
+              
+              est_ptr=0
+              dna_ptr=self.max_intron_size
+              ptr=0
+              num_mismatch = 0
+              don_dna_ptr=len(don_dna)-(acc_pos-self.max_intron_size)-1
+              while ptr<len(original_est):
+                  
+                  if original_est[ptr]=='[':
+                      dnaletter=original_est[ptr+1]
+                      estletter=original_est[ptr+2]
+                      if dna_ptr > acc_pos:
+                          original_est_cut+=original_est[ptr:ptr+4] 
+                          num_mismatch += 1
+                      else:
+                          if don_dna[don_dna_ptr]==estletter:
+                              original_est_cut += estletter # EST letter
+                          else:
+                              original_est_cut += '['+don_dna[don_dna_ptr]+estletter+']' # EST letter
+                              num_mismatch += 1
+                              #print '['+don_dna[don_dna_ptr]+estletter+']' 
+                          don_dna_ptr+=1 
+                      ptr+=4 
+                  else:
+                      dnaletter=original_est[ptr]
+                      estletter=dnaletter
+                
+                      if dna_ptr > acc_pos:
+                          original_est_cut+=estletter # EST letter
+                      else:
+                          if don_dna[don_dna_ptr]==estletter:
+                              original_est_cut += estletter # EST letter
+                          else:
+                              original_est_cut += '['+don_dna[don_dna_ptr]+estletter+']' # EST letter
+                              num_mismatch += 1
+                              #print '('+don_dna[don_dna_ptr]+estletter+')' 
+                          don_dna_ptr+=1 
+                          
+                      ptr+=1
+
+                  if estletter=='-':
+                      dna_ptr+=1 
+                  elif dnaletter=='-':
+                      est_ptr+=1
+                  else:
+                      dna_ptr+=1 
+                      est_ptr+=1
+                         
+              if num_mismatch>self.max_mismatch:
+                  continue
+              assert(dna_ptr<=len(dna))
+              assert(est_ptr<=len(est))
+
+              #print original_est, original_est_cut              
+
+              score = computeSpliceAlignScoreWithQuality(original_est_cut, quality, qualityPlifs, run, self.currentPhi)
+              score += AcceptorScore + IntronScore + DonorScore + self.spliced_bias
+         
+              alternativeScores.append(score)
 
-   def calcAlignmentScore(self,dna,exons,est):
+              if don_score>=self.splice_stop_thresh:
+                  break
+              
+      _stop = cpu()
+      self.alternativeScoresTime += _stop-_start
+
+      return alternativeScores
+
+
+   def calcAlignmentScore(self,alignment):
       """
       Given an alignment (dna,exons,est) and the current parameter for QPalma
       this function calculates the dot product of the feature representation of
       the alignment and the parameter vector i.e the alignment score. 
       """
 
-      currentPhi = zeros((run['numFeatures'],1))
-
-      # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
-      trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
-      computeSpliceAlignWithQuality(dna, exons, est, original_est,\
-      quality, qualityPlifs, run)
+      start = cpu()
+      run = self.run
 
-      # Calculate the weights
-      trueWeightDon, trueWeightAcc, trueWeightIntron =\
-      computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
-      trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
+      # Lets start calculation
+      dna, exons, est, original_est, quality, acc_supp, don_supp = alignment
 
-      currentPhi[0:lengthSP]                                            = mat(h.penalties[:]).reshape(lengthSP,1)
-      currentPhi[lengthSP:lengthSP+donSP]                               = mat(d.penalties[:]).reshape(donSP,1)
-      currentPhi[lengthSP+donSP:lengthSP+donSP+accSP]                   = mat(a.penalties[:]).reshape(accSP,1)
-      currentPhi[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP]   = mmatrix[:]
+      score = computeSpliceAlignScoreWithQuality(original_est, quality, self.qualityPlifs, run, self.currentPhi)
 
-      if run['mode'] == 'using_quality_scores':
-         totalQualityPenalties = param[-totalQualSP:]
-         currentPhi[lengthSP+donSP+accSP+mmatrixSP:]                    = totalQualityPenalties[:]
+      stop = cpu()
+      self.calcAlignmentScoreTime += stop-start
+      
+      return score 
 
-      # Calculate w'phi(x,y) the total score of the alignment
-      return (trueWeight.T * currentPhi)[0,0]
 
+def cpu():
+   return (resource.getrusage(resource.RUSAGE_SELF).ru_utime+\
+   resource.getrusage(resource.RUSAGE_SELF).ru_stime) 
 
 
 if __name__ == '__main__':
-   filename = sys.argv[1]
-   out_filename = sys.argv[2]
-   ph1 = PipelineHeuristic(filename)
+   if len(sys.argv) != 6:
+      print 'Usage: ./%s data param run spliced.results unspliced.results' % (sys.argv[0])
+      sys.exit(1)
+
+   data_fname = sys.argv[1]
+   param_fname = sys.argv[2]
+   run_fname = sys.argv[3]
+
+   result_spliced_fname    = sys.argv[4]
+   result_unspliced_fname  = sys.argv[5]
+
+   jp = os.path.join
+
+   ph1 = PipelineHeuristic(run_fname,data_fname,param_fname,result_spliced_fname,result_unspliced_fname)
+
+   start = cpu()
    ph1.filter()
+   stop = cpu()
+   print 'total time elapsed: %f' % (stop-start)
+
+   print 'time spend for get seq: %f' % ph1.get_time
+   print 'time spend for calcAlignmentScoreTime: %f' %  ph1.calcAlignmentScoreTime
+   print 'time spend for alternativeScoresTime: %f' % ph1.alternativeScoresTime
+   print 'time spend for count time: %f' % ph1.count_time
+   print 'time spend for init time: %f' % ph1.init_time
+   print 'time spend for main_loop time: %f' % ph1.main_loop
+   print 'time spend for splice_site_time time: %f' % ph1.splice_site_time
+
+   print 'time spend for computeSpliceAlignWithQualityTime time: %f'% ph1.computeSpliceAlignWithQualityTime
+   print 'time spend for computeSpliceWeightsTime time: %f'% ph1.computeSpliceWeightsTime
+   print 'time spend for DotProdTime time: %f'% ph1.DotProdTime
+   print 'time spend forarray_stuff time: %f'% ph1.array_stuff