+ got rid of some legacy code
[qpalma.git] / qpalma / computeSpliceAlignWithQuality.py
index 6a679e5..e498098 100644 (file)
@@ -1,12 +1,17 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
 
 import numpy
 from numpy.matlib import mat,zeros,ones,inf
 import pdb
 from Plif import Plf,base_coord,linspace
 
-import Helpers
+import sequence_utils
 
 def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, qualityPlifs, run):
    """
@@ -48,8 +53,15 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
 
    elif exons.shape == (2,1):
       exonSize = exons[1,0] - exons[0,0]
+
+      if exons[0,0] > 0:
+         SpliceAlign.extend([4]*(exons[0,0]))
+
       SpliceAlign.extend([0]*(exonSize))
 
+      if len(dna) > exons[1,0]:
+         SpliceAlign.extend([4]*(len(dna)-exons[1,0]))
+
    else:
       pass
 
@@ -91,10 +103,10 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
          orig_pos += 1
 
    for orig_char in dna_part:
-      assert orig_char in Helpers.alphabet, pdb.set_trace()
+      assert orig_char in sequence_utils.alphabet, pdb.set_trace()
 
    for orig_char in est_part:
-      assert orig_char in Helpers.alphabet, pdb.set_trace()
+      assert orig_char in sequence_utils.alphabet, pdb.set_trace()
 
    trueWeightMatch = zeros((run['matchmatrixRows']*run['matchmatrixCols'],1)) # Scorematrix fuer Wahrheit
 
@@ -173,3 +185,77 @@ def  computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, quali
    #assert dna_orig == dna_calc, pdb.set_trace()
 
    return SpliceAlign, trueWeightMatch, trueWeightQuality, dna_calc
+
+
+def  computeSpliceAlignScoreWithQuality(original_est, quality, qualityPlifs, run, weightvector):
+   """
+   """
+
+   lengthSP    = run['numLengthSuppPoints']
+   donSP       = run['numDonSuppPoints']
+   accSP       = run['numAccSuppPoints']
+   mmatrixSP   = run['matchmatrixRows']*run['matchmatrixCols']
+   numq        = run['numQualSuppPoints']*5*6
+
+   lengthP = weightvector[0:lengthSP]
+   donP = weightvector[lengthSP:lengthSP+donSP]
+   accP = weightvector[donSP+lengthSP:donSP+lengthSP+accSP]
+   mmatrixP = weightvector[accSP+donSP+lengthSP:accSP+donSP+lengthSP+mmatrixSP]
+   numqP = weightvector[accSP+donSP+lengthSP+mmatrixSP:accSP+donSP+lengthSP+mmatrixSP+numq]
+   numQualSuppPoints=run['numQualSuppPoints']
+   
+   map = {'-':0, 'a':1, 'c':2, 'g':3, 't':4, 'n':5, '[': 10, ']':11 }
+   original_est_mapped = [ map[e] for e in original_est ]
+
+   if True:
+       score=0.0
+       est_ptr=0
+       dna_ptr=0
+       ptr=0 
+       while ptr<len(original_est_mapped):
+           
+           if original_est_mapped[ptr]==10: #'[':
+               dnaletter=original_est_mapped[ptr+1]
+               estletter=original_est_mapped[ptr+2]
+               ptr+=4 
+           else:
+               dnaletter=original_est_mapped[ptr]
+               estletter=dnaletter
+               ptr+=1
+
+           if estletter==0:# '-':
+               score+= mmatrixP[dna_letter]
+           else:
+               value = quality[est_ptr]
+               cur_plf = qualityPlifs[(estletter-1)*6+dnaletter]
+               
+               Lower = 0
+               for elem in cur_plf.limits:
+                   if elem<=value:
+                       Lower+=1 
+               
+               if Lower == 0:
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+0] ;
+                   
+               elif Lower == len(cur_plf.limits):
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints + numQualSuppPoints-1] ;
+
+               else:
+                   # because we count from 0 in python
+                   Lower -= 1
+                   Upper = Lower+1 ; # x-werte bleiben fest
+                   weightup  = 1.0*(value - cur_plf.limits[Lower]) / (cur_plf.limits[Upper] - cur_plf.limits[Lower])
+                   weightlow = 1.0*(cur_plf.limits[Upper] - value) / (cur_plf.limits[Upper] - cur_plf.limits[Lower])
+                   
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+Upper]*weightup ;
+                   score += numqP[(estletter-1)*6*numQualSuppPoints + dnaletter*numQualSuppPoints+Lower]*weightlow ;
+                                  
+           if estletter==0:# '-':
+               dna_ptr+=1 
+           elif dnaletter==0:#'-':
+               est_ptr+=1
+           else:
+               dna_ptr+=1 
+               est_ptr+=1
+               
+   return score