first version of faster heuristic
authorraetsch <raetsch@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 17 Apr 2008 19:10:14 +0000 (19:10 +0000)
committerraetsch <raetsch@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 17 Apr 2008 19:10:14 +0000 (19:10 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8588 e1793c9e-67f9-0310-80fc-b846ff1f7b36

qpalma/computeSpliceAlignWithQuality.py
scripts/PipelineHeuristic.py

index 6a679e5..96b61da 100644 (file)
@@ -173,3 +173,94 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
    #assert dna_orig == dna_calc, pdb.set_trace()
 
    return SpliceAlign, trueWeightMatch, trueWeightQuality, dna_calc
    #assert dna_orig == dna_calc, pdb.set_trace()
 
    return SpliceAlign, trueWeightMatch, trueWeightQuality, dna_calc
+
+
+def  computeSpliceAlignScoreWithQuality(dna, exons, est, original_est, quality, qualityPlifs, run, weightvector):
+   """
+   """
+
+   lengthSP    = run['numLengthSuppPoints']
+   donSP       = run['numDonSuppPoints']
+   accSP       = run['numAccSuppPoints']
+   mmatrixSP   = run['matchmatrixRows']*run['matchmatrixCols']
+   numq        = run['numQualSuppPoints']
+
+   lengthP = weightvector[0:lengthSP]
+   donP = weightvector[lengthSP:lengthSP+donSP]
+   accP = weightvector[donSP+lengthSP:donSP+lengthSP+accSP]
+   mmatrixP = weightvector[accSP+donSP+lengthSP:accSP+donSP+lengthSP+mmatrixP]
+   numqP = weightvector[accSP+donSP+lengthSP+mmatrixP:accSP+donSP+lengthSP+mmatrixP+numq]
+   numQualSuppPoints=run['numQualSuppPoints']
+   
+   map = {'-':0, 'a':1, 'c':2, 'g':3, 't':4, 'n':5, '[': 10, ']':11}
+   original_est_mapped = [ map[e] for e in original_est ]
+
+   if exons.shape == (2,2):
+      numberOfExons = 2
+      exonSizes = [-1]*numberOfExons
+
+      exonSizes[0] = exons[0,1] - exons[0,0]
+      exonSizes[1] = exons[1,1] - exons[1,0]
+
+      score=0.0 
+
+   elif exons.shape == (2,1):
+       score=0.0
+       est_ptr=0
+       dna_ptr=0
+       ptr=0 
+       while ptr<len(original_est_mapped):
+           
+           if original_est[ptr]==10: #'[':
+               dnaletter=original_est_mapped[ptr+1]
+               estletter=original_est_mapped
+               ptr+=4 
+           else:
+               dnaletter=original_est[ptr+2][ptr]
+               estletter=dnaletter
+               ptr+=1
+           if est_letter=='-':
+               score+= mmatrixP[dna_letter]
+               dna_ptr+=1 
+           else:
+               est_qual = quality[est_ptr]
+
+               cur_plf = qualityPlifs[(estnum-1)*6+dnanum]
+               
+               Lower = len([elem for elem in cur_plf.limits if elem <= value])
+               
+               if Lower == 0:
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+0] ;
+                   
+               elif Lower == len(cur_plf.limits):
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+numQualSuppPoints-1] ;
+
+               else:
+                   # because we count from 0 in python
+                   Lower -= 1
+                   Upper = Lower+1 ; # x-werte bleiben fest
+                   #print value,Lower,Upper
+                   weightup  = 1.0*(value - cur_plf.limits[Lower]) / (cur_plf.limits[Upper] - cur_plf.limits[Lower])
+                   weightlow = 1.0*(cur_plf.limits[Upper] - value) / (cur_plf.limits[Upper] - cur_plf.limits[Lower])
+                   
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+Upper]*weightup ;
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+Lower]*weightlow ;
+                                  
+           if est_letter=='-':
+               dna_ptr+=1 
+           elif dna_letter=='-':
+               est_ptr+=1
+           else:
+               dna_ptr+=1 
+               est_ptr+=1
+               
+           assert(dna_ptr<len(dna))
+           assert(est_ptr<len(est))
+           
+       exonSize = exons[1,0] - exons[0,0]
+       SpliceAlign.extend([0]*(exonSize))
+
+   else:
+       assert(False)
+
+   return score
index 10b8fbe..472dd4d 100644 (file)
@@ -371,6 +371,9 @@ class PipelineHeuristic:
          computeSpliceAlignWithQuality(dna, exons, est, original_est,\
          quality, qualityPlifs, run)
 
          computeSpliceAlignWithQuality(dna, exons, est, original_est,\
          quality, qualityPlifs, run)
 
+         score = computeSpliceAlignScoreWithQuality(dna, exons, est, original_est,\
+         quality, qualityPlifs, run, self.currentPhi)
+
          stop = cpu()
          self.computeSpliceAlignWithQualityTime += stop-start
          start = cpu()
          stop = cpu()
          self.computeSpliceAlignWithQualityTime += stop-start
          start = cpu()
@@ -389,6 +392,9 @@ class PipelineHeuristic:
          # Calculate w'phi(x,y) the total score of the alignment
          alternativeScores.append((trueWeight.T * self.currentPhi)[0,0])
 
          # Calculate w'phi(x,y) the total score of the alignment
          alternativeScores.append((trueWeight.T * self.currentPhi)[0,0])
 
+         if score!=0.0:
+             print 'diff %f' % (trueWeight.T * self.currentPhi)[0,0] - score 
+
          stop = cpu()
          self.DotProdTime += stop-start
 
          stop = cpu()
          self.DotProdTime += stop-start