+ changed interface for decoded plif features -> directly access double array
[qpalma.git] / python / qpalma.py
index 6aadd92..ff4fc31 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#qualityquality!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ###########################################################
@@ -26,7 +26,6 @@ from paths_load_data_solexa import *
 
 from computeSpliceWeights import *
 from set_param_palma import *
-from computeSpliceAlign import *
 from computeSpliceAlignWithQuality import *
 from penalty_lookup_new import *
 from compute_donacc import *
@@ -120,7 +119,8 @@ class QPalma:
       donSP       = Configuration.numDonSuppPoints
       accSP       = Configuration.numAccSuppPoints
       lengthSP    = Configuration.numLengthSuppPoints
-      mmatrixSP   = Configuration.sizeMMatrix
+      mmatrixSP   = Configuration.sizeMatchmatrix[0]\
+      *Configuration.sizeMatchmatrix[1]
       numq        = Configuration.numQualSuppPoints
       totalQualSP = Configuration.totalQualSuppPoints
 
@@ -132,14 +132,15 @@ class QPalma:
          if iteration_nr == iteration_steps:
             break
 
-         #for exampleIdx in range(59,self.numExamples):
-         for exampleIdx in range(self.numExamples):
+         #for exampleIdx in range(self.numExamples):
+         for exampleIdx in range(6,11):
             if (exampleIdx%10) == 0:
                print 'Current example nr %d' % exampleIdx
 
             dna = Sequences[exampleIdx] 
             est = Ests[exampleIdx] 
             quality = Qualities[exampleIdx]
+            quality = [40]*len(quality )
             exons = Exons[exampleIdx] 
             # NoiseMatrix = Noises[exampleIdx] 
             don_supp = Donors[exampleIdx] 
@@ -152,9 +153,7 @@ class QPalma:
             trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
 
             trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
-
-            for pos,plif in enumerate(qualityPlifs):
-               totalQualityPenalties[pos*numq:(pos+1)*numq] = mat(plif.penalties).reshape(numq,1)
+            totalQualityPenalties = param[-totalQualSP:]
 
             currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
             currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
@@ -206,31 +205,33 @@ class QPalma:
             a_len = len(acceptor)
             acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
 
-            currentAlignment = QPalmaDP.Alignment(Configuration.numQualSuppPoints )
+            currentAlignment = QPalmaDP.Alignment(Configuration.numQualPlifs,Configuration.numQualSuppPoints)
             c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
-            #print 'PYTHON: Calling myalign...'
+
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
              est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
              acceptor, a_len, c_qualityPlifs, remove_duplicate_scores, print_matrix)
-            #print 'PYTHON: After myalign call...'
+
+            print 'Python: after myalign...'
 
             c_SpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
             c_EstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
             c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
             c_AlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
-
+           
             emptyPlif = Plf(Configuration.numQualSuppPoints)
             emptyPlif = emptyPlif.convert2SWIG()
-            c_qualityPlifsFeatures = QPalmaDP.createPenaltyArrayFromList([emptyPlif]*(Configuration.numQualPlifs*num_path[exampleIdx]))
+            c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Configuration.numQualPlifs*num_path[exampleIdx]))
 
             currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
             c_WeightMatch, c_AlignmentScores, c_qualityPlifsFeatures)
 
+            print 'Python: after getAlignmentResults...'
+
             newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
             newEstAlign = zeros((est_len*num_path[exampleIdx],1))
             newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
-            newQualityPlifs = [None]*num_path[exampleIdx]*Configuration.numQualPlifs
 
             #print 'newSpliceAlign'
             for i in range(dna_len*num_path[exampleIdx]):
@@ -251,11 +252,7 @@ class QPalma:
             for i in range(num_path[exampleIdx]):
                AlignmentScores[i+1] = c_AlignmentScores[i]
 
-            #print 'newQualityPlifs'
-            for i in range(num_path[exampleIdx]*Configuration.numQualPlifs):
-               newQualityPlifs[i] = QPalmaDP.penaltyArray_getitem(c_qualityPlifsFeatures, i)
-
-            #print "Calling destructors"
+            print "Calling destructors"
 
             del c_SpliceAlign
             del c_EstAlign
@@ -281,16 +278,9 @@ class QPalma:
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
 
                decodedQualityFeatures = zeros((Configuration.totalQualSuppPoints,1))
-               qidx = 0
 
-               for currentPlif in newQualityPlifs[Configuration.numQualPlifs*pathNr:Configuration.numQualPlifs*(pathNr+1)]:
-                  for tidx in range(currentPlif.len):
-                     #elem = currentPlif.penalties[tidx]
-                     elem = QPalmaDP.doubleFArray_getitem(currentPlif.penalties, tidx)
-                     #print '%f ' % elem, 
-                     decodedQualityFeatures[qidx] = elem
-                     qidx += 1
-                  #print
+               for qidx in range(Configuration.numQualPlifs*pathNr,Configuration.numQualPlifs*(pathNr+1)):
+                decodedQualityFeatures[qidx%Configuration.numQualPlifs] = c_qualityPlifsFeatures[qidx]
 
                # sum up positionwise loss between alignments
                for alignPosIdx in range(len(newSpliceAlign[pathNr,:])):
@@ -336,7 +326,7 @@ class QPalma:
                   trueWeights       = allWeights[:,0]
                   firstFalseWeights = allWeights[:,firstFalseIdx]
                   differenceVector  = firstFalseWeights - trueWeights
-                  pdb.set_trace()
+                  #pdb.set_trace()
 
                   if not __debug__:
                      const_added = solver.addConstraint(differenceVector, exampleIdx)
@@ -344,6 +334,7 @@ class QPalma:
                #
                # end of one example processing 
                #
+            del c_qualityPlifsFeatures
 
             # call solver every nth step
             if exampleIdx != 0 and exampleIdx % 3 == 0 and not __debug__:
@@ -359,8 +350,8 @@ class QPalma:
                   param[i] = w[i]
 
                [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+
             #   break
-         
          #break
 
          #
@@ -371,9 +362,10 @@ class QPalma:
       #
       # end of optimization 
       #  
+      print 'Training completed'
+      pdb.set_trace()
       export_param('elegans.param',h,d,a,mmatrix)
       self.logfh.close()
-      print 'Training completed'
 
 def fetch_genome_info(ginfo_filename):
    if not os.path.exists(ginfo_filename):
@@ -398,40 +390,5 @@ def fetch_genome_info(ginfo_filename):
 if __name__ == '__main__':
    qpalma = QPalma()
    qpalma.run()
-
-def initializeQualityScoringFunctions(numPlifs,numSuppPoints):
-
-   min_intron_len=20
-   max_intron_len=1000
-   min_svm_score=-5
-   max_svm_score=5
-
-   qualityPlifs = [None]*numPlifs
-
-   for idx in range(numPlifs):
-
-      curPlif = Plf()
-      curPlif.limits    = linspace(min_svm_score,max_svm_score,numSuppPoints) 
-      curPlif.penalties = [0]*numSuppPoints
-      curPlif.transform = '' 
-      curPlif.name      = '' 
-      curPlif.max_len   = 100 
-      curPlif.min_len   = -100 
-      curPlif.id        = 1 
-      curPlif.use_svm   = 0 
-      curPlif.next_id   = 0 
-
-      if idx == 0:
-         curPlif.penalties[0] = 11
-         curPlif.penalties[1] = 22
-         curPlif.penalties[2] = 33
-
-      if idx == 1:
-         curPlif.penalties[0] = 99
-         curPlif.penalties[1] = 100
-         curPlif.penalties[2] = 101
-
-      curPlif = curPlif.convert2SWIG()
-      qualityPlifs[idx] = curPlif
-
-   return qualityPlifs
+   pdb.set_trace()
+   l = [elem+1 for elem in range(44)]