+ got rid of some legacy code
[qpalma.git] / qpalma / computeSpliceAlignWithQuality.py
index 96b61da..e498098 100644 (file)
@@ -1,12 +1,17 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
 
 import numpy
 from numpy.matlib import mat,zeros,ones,inf
 import pdb
 from Plif import Plf,base_coord,linspace
 
-import Helpers
+import sequence_utils
 
 def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, qualityPlifs, run):
    """
@@ -48,8 +53,15 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
 
    elif exons.shape == (2,1):
       exonSize = exons[1,0] - exons[0,0]
+
+      if exons[0,0] > 0:
+         SpliceAlign.extend([4]*(exons[0,0]))
+
       SpliceAlign.extend([0]*(exonSize))
 
+      if len(dna) > exons[1,0]:
+         SpliceAlign.extend([4]*(len(dna)-exons[1,0]))
+
    else:
       pass
 
@@ -91,10 +103,10 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
          orig_pos += 1
 
    for orig_char in dna_part:
-      assert orig_char in Helpers.alphabet, pdb.set_trace()
+      assert orig_char in sequence_utils.alphabet, pdb.set_trace()
 
    for orig_char in est_part:
-      assert orig_char in Helpers.alphabet, pdb.set_trace()
+      assert orig_char in sequence_utils.alphabet, pdb.set_trace()
 
    trueWeightMatch = zeros((run['matchmatrixRows']*run['matchmatrixCols'],1)) # Scorematrix fuer Wahrheit
 
@@ -175,7 +187,7 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
    return SpliceAlign, trueWeightMatch, trueWeightQuality, dna_calc
 
 
-def  computeSpliceAlignScoreWithQuality(dna, exons, est, original_est, quality, qualityPlifs, run, weightvector):
+def  computeSpliceAlignScoreWithQuality(original_est, quality, qualityPlifs, run, weightvector):
    """
    """
 
@@ -183,84 +195,67 @@ def  computeSpliceAlignScoreWithQuality(dna, exons, est, original_est, quality,
    donSP       = run['numDonSuppPoints']
    accSP       = run['numAccSuppPoints']
    mmatrixSP   = run['matchmatrixRows']*run['matchmatrixCols']
-   numq        = run['numQualSuppPoints']
+   numq        = run['numQualSuppPoints']*5*6
 
    lengthP = weightvector[0:lengthSP]
    donP = weightvector[lengthSP:lengthSP+donSP]
    accP = weightvector[donSP+lengthSP:donSP+lengthSP+accSP]
-   mmatrixP = weightvector[accSP+donSP+lengthSP:accSP+donSP+lengthSP+mmatrixP]
-   numqP = weightvector[accSP+donSP+lengthSP+mmatrixP:accSP+donSP+lengthSP+mmatrixP+numq]
+   mmatrixP = weightvector[accSP+donSP+lengthSP:accSP+donSP+lengthSP+mmatrixSP]
+   numqP = weightvector[accSP+donSP+lengthSP+mmatrixSP:accSP+donSP+lengthSP+mmatrixSP+numq]
    numQualSuppPoints=run['numQualSuppPoints']
    
-   map = {'-':0, 'a':1, 'c':2, 'g':3, 't':4, 'n':5, '[': 10, ']':11}
+   map = {'-':0, 'a':1, 'c':2, 'g':3, 't':4, 'n':5, '[': 10, ']':11 }
    original_est_mapped = [ map[e] for e in original_est ]
 
-   if exons.shape == (2,2):
-      numberOfExons = 2
-      exonSizes = [-1]*numberOfExons
-
-      exonSizes[0] = exons[0,1] - exons[0,0]
-      exonSizes[1] = exons[1,1] - exons[1,0]
-
-      score=0.0 
-
-   elif exons.shape == (2,1):
+   if True:
        score=0.0
        est_ptr=0
        dna_ptr=0
        ptr=0 
        while ptr<len(original_est_mapped):
            
-           if original_est[ptr]==10: #'[':
+           if original_est_mapped[ptr]==10: #'[':
                dnaletter=original_est_mapped[ptr+1]
-               estletter=original_est_mapped
+               estletter=original_est_mapped[ptr+2]
                ptr+=4 
            else:
-               dnaletter=original_est[ptr+2][ptr]
+               dnaletter=original_est_mapped[ptr]
                estletter=dnaletter
                ptr+=1
-           if est_letter=='-':
+
+           if estletter==0:# '-':
                score+= mmatrixP[dna_letter]
-               dna_ptr+=1 
            else:
-               est_qual = quality[est_ptr]
-
-               cur_plf = qualityPlifs[(estnum-1)*6+dnanum]
+               value = quality[est_ptr]
+               cur_plf = qualityPlifs[(estletter-1)*6+dnaletter]
                
-               Lower = len([elem for elem in cur_plf.limits if elem <= value])
+               Lower = 0
+               for elem in cur_plf.limits:
+                   if elem<=value:
+                       Lower+=1 
                
                if Lower == 0:
                    score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+0] ;
                    
                elif Lower == len(cur_plf.limits):
-                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+numQualSuppPoints-1] ;
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints + numQualSuppPoints-1] ;
 
                else:
                    # because we count from 0 in python
                    Lower -= 1
                    Upper = Lower+1 ; # x-werte bleiben fest
-                   #print value,Lower,Upper
                    weightup  = 1.0*(value - cur_plf.limits[Lower]) / (cur_plf.limits[Upper] - cur_plf.limits[Lower])
                    weightlow = 1.0*(cur_plf.limits[Upper] - value) / (cur_plf.limits[Upper] - cur_plf.limits[Lower])
                    
                    score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+Upper]*weightup ;
                    score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+Lower]*weightlow ;
                                   
-           if est_letter=='-':
+           if estletter==0:# '-':
                dna_ptr+=1 
-           elif dna_letter=='-':
+           elif dnaletter==0:#'-':
                est_ptr+=1
            else:
                dna_ptr+=1 
                est_ptr+=1
                
-           assert(dna_ptr<len(dna))
-           assert(est_ptr<len(est))
-           
-       exonSize = exons[1,0] - exons[0,0]
-       SpliceAlign.extend([0]*(exonSize))
-
-   else:
-       assert(False)
-
    return score