git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8654 e1793c9e...
authorraetsch <raetsch@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 18 Apr 2008 08:57:22 +0000 (08:57 +0000)
committerraetsch <raetsch@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 18 Apr 2008 08:57:22 +0000 (08:57 +0000)
scripts/PipelineHeuristic.py

index 4ea1cb9..d3ab96e 100644 (file)
@@ -313,7 +313,7 @@ class PipelineHeuristic:
       """
 
       run = self.run
-      splice_thresh = 0.01
+      splice_thresh = 0.1
       max_intron_size = 2000 
 
       id       = location['id']
@@ -356,56 +356,61 @@ class PipelineHeuristic:
       # compute dummy scores
       #IntronScore = calculatePlif(h, [math.fabs(max_acc_pos-30)])[0]
       #dummyAcceptorScore = calculatePlif(a, [max_acc_score])[0] 
-      IntronScore = calculatePlif(h, [self.intron_size])[0] - 0.5
-      dummyAcceptorScore = calculatePlif(a, [0.25])[0] 
-      dummyDonorScore = calculatePlif(d, [0.25])[0]
       
       _start = cpu()
       for (don_pos,don_score) in proximal_don:
-         # remove mismatching positions in the second exon
-         original_est_cut=''
 
-         est_ptr=0
-         dna_ptr=0
-         ptr=0 
-         while ptr<len(original_est):
+          for (acc_pos,acc_score,acc_dna) in distal_acc:
+
+              DonorScore = calculatePlif(d, [don_score])[0]
+              IntronScore = calculatePlif(h, [acc_pos-don_pos])[0] 
+              AcceptorScore = calculatePlif(a, [acc_score])[0] 
+          
+              print 'splice: ', (don_pos,don_score), (acc_pos,acc_score,acc_dna), (DonorScore,IntronScore,AcceptorScore)
+
+              # construct a new "original_est"
+              original_est_cut=''
+              
+              est_ptr=0
+              dna_ptr=0
+              ptr=0
+              acc_dna_ptr=0 
+              while ptr<len(original_est):
              
-            if original_est[ptr]=='[':
-                dnaletter=original_est[ptr+1]
-                estletter=original_est[ptr+2]
-                if dna_ptr < don_pos:
-                    original_est_cut+=original_est[ptr:ptr+4] 
-                else:
-                    #original_est_cut+=estletter # EST letter
-                    original_est_cut+=dnaletter # DNA letter
-                ptr+=4 
-            else:
-                dnaletter=original_est[ptr]
-                estletter=dnaletter
+                  if original_est[ptr]=='[':
+                      dnaletter=original_est[ptr+1]
+                      estletter=original_est[ptr+2]
+                      if dna_ptr < don_pos:
+                          original_est_cut+=original_est[ptr:ptr+4] 
+                      else:
+                          if acc_dna[acc_dna_ptr]==estletter:
+                              original_est_cut += estletter # EST letter
+                          else:
+                              original_est_cut += '['+acc_dna[acc_dna_ptr]+estletter+']' # EST letter
+                          acc_dna_ptr+=1 
+                      ptr+=4 
+                  else:
+                      dnaletter=original_est[ptr]
+                      estletter=dnaletter
                 
-                original_est_cut+=estletter # EST letter
-                ptr+=1
-
-            if estletter=='-':
-                dna_ptr+=1 
-            elif dnaletter=='-':
-                est_ptr+=1
-            else:
-                dna_ptr+=1 
-                est_ptr+=1
+                      original_est_cut+=estletter # EST letter
+                      ptr+=1
+
+                  if estletter=='-':
+                      dna_ptr+=1 
+                  elif dnaletter=='-':
+                      est_ptr+=1
+                  else:
+                      dna_ptr+=1 
+                      est_ptr+=1
                          
-         assert(dna_ptr<=len(dna))
-         assert(est_ptr<=len(est))
+              assert(dna_ptr<=len(dna))
+              assert(est_ptr<=len(est))
 
-         #print "Donor"
-         DonorScore = calculatePlif(d, [don_score])[0]
-         #print DonorScore,don_score,don_pos
+              score = computeSpliceAlignScoreWithQuality(original_est_cut, quality, qualityPlifs, run, self.currentPhi)
+              score += AcceptorScore + IntronScore + DonorScore
          
-         score = computeSpliceAlignScoreWithQuality(original_est_cut, quality, qualityPlifs, run, self.currentPhi)
-         score += dummyAcceptorScore + IntronScore + DonorScore
-         
-         #print 'diff %f,%f,%f' % ((trueWeight.T * self.currentPhi)[0,0] - score,(trueWeight.T * self.currentPhi)[0,0], score)
-         alternativeScores.append(score)
+              alternativeScores.append(score)
 
       _stop = cpu()
       self.alternativeScoresTime += _stop-_start