+ fixed some index bugs in the weights calculation
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 5 Dec 2007 17:39:01 +0000 (17:39 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 5 Dec 2007 17:39:01 +0000 (17:39 +0000)
+ fixed array bug in SWIG interface

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6931 e1793c9e-67f9-0310-80fc-b846ff1f7b36

python/.qpalma.py.swp
python/computeSpliceAlign.py
python/computeSpliceWeights.py
python/compute_donacc.py
python/paths_load_data.py
python/penalty_lookup_new.py
python/qpalma.py
python/set_param_palma.py

index 37f0ebb..7d97710 100644 (file)
Binary files a/python/.qpalma.py.swp and b/python/.qpalma.py.swp differ
index 6f0ad72..a6d4a49 100644 (file)
@@ -18,6 +18,8 @@ def  computeSpliceAlign(dna, exons):
    numberOfExons = (exons.shape)[0] # how many rows ?
    exonSizes = [-1]*numberOfExons
 
+   assert numberOfExons == 3
+
    for idx in range(numberOfExons):
       exonSizes[idx] = exons[idx,1] - exons[idx,0]
 
@@ -27,30 +29,19 @@ def  computeSpliceAlign(dna, exons):
    # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
    SpliceAlign = []
 
-   if exons[0,0] > 1:
+   if exons[0,0] > 0:
       SpliceAlign.extend([4]*(exons[0,0]))
 
-   #for i=1:nr_exons
-   #  exon_length = exon_sizes(i); %exons(i,2) is begin of intron
-   #  SpliceAlign = [SpliceAlign, zeros(1,exon_length)] ;
-   #  if i~= nr_exons,
-   #    intron_length = exons(i+1,1) - exons(i,2) ;
-   #    SpliceAlign = [SpliceAlign, 1, ones(1,intron_length-2)*3, 2] ;
-   #  end
-   #end 
-
    for idx in range(numberOfExons):
       exonLength = exonSizes[idx]
       SpliceAlign.extend([0]*exonLength)
       
       if idx < numberOfExons-1:
          intronLength = exons[idx+1,0] - exons[idx,1]
-         SpliceAlign.extend([1])
-         SpliceAlign.extend([3]*((intronLength-2)))
-         SpliceAlign.extend([2])
-         
+         SpliceAlign.extend([1]+[3]*(intronLength-2)+[2])
+
    if len(dna) > exons[2,1]:
-      SpliceAlign.extend([4]*(len(dna)+1-exons[2,1]))
+      SpliceAlign.extend([4]*(len(dna)-exons[2,1]))
 
    assert len(SpliceAlign) == len(dna), pdb.set_trace()
 
index ba82fb5..4ffe143 100644 (file)
@@ -29,8 +29,7 @@ def calculateWeights(plf, scores):
       Lower = len([elem for elem in plf.limits if elem <= value])
       Upper = Lower+1 ; # x-werte bleiben fest
 
-      Lower = Lower - 1
-      Upper = Upper - 1
+      #print Lower,Upper,len(plf.limits)
 
       if Lower == 0:
          currentWeight[0] += 1
@@ -42,8 +41,8 @@ def calculateWeights(plf, scores):
          currentWeight[Upper] = currentWeight[Upper] + weightup
          currentWeight[Lower] = currentWeight[Lower] + weightlow
 
-      #print value,plf.limits[Lower],plf.limits[Upper]
-      #print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
+         print value,plf.limits[Lower],plf.limits[Upper]
+         print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
 
    return currentWeight
 
@@ -56,7 +55,7 @@ def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
    DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
    assert not ( -inf in DonorScores )
 
-   print 'donor'
+   #print 'donor'
    weightDon = calculateWeights(d,DonorScores)
 
    ####################################################################################
@@ -67,7 +66,7 @@ def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
    AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos-1] == 2]
    assert not ( -inf in AcceptorScores )
 
-   print 'acceptor'
+   #print 'acceptor'
    weightAcc = calculateWeights(a,AcceptorScores)
 
    ####################################################################################
index b198877..bad0872 100644 (file)
@@ -8,13 +8,12 @@ from penalty_lookup_new import *
 def compute_donacc(donor_supp, acceptor_supp, d, a):
 
    assert(len(donor_supp)==len(acceptor_supp))
+   size = len(donor_supp)
   
-   #donor = zeros((len(donor_supp),1))
-   #acceptor= zeros((len(acceptor_supp),1))
-   donor = [0.0]*len(donor_supp)
-   acceptor= [0.0]*len(acceptor_supp)
+   donor    = [0.0]*size
+   acceptor = [0.0]*size
 
-   for idx in range(len(donor_supp)):
+   for idx in range(size):
      if isinf(donor_supp[idx]):
        donor[idx] = donor_supp[idx]
      else:
index bd31cce..fef599c 100644 (file)
@@ -116,4 +116,9 @@ def paths_load_data(expt,genome_info,PAR):
    #  Exons     = TestExon ;  % exon boundaries
    #  Ests      = TestEsts ;  % est sequences
 
+   # Lower all indices by one to convert matlab 
+   # to python indices
+
+   Exons -= 1
+
    return Sequences, Acceptors, Donors, Exons, Ests, Noises
index 351ba73..a0f5d1c 100644 (file)
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import math
+import pdb
 
 def penalty_lookup_new(penalty_struct, value):
 
index 51ee5c7..3277d43 100644 (file)
@@ -162,11 +162,12 @@ def run():
 
          # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
          trueSpliceAlign, trueWeightMatch = computeSpliceAlign(dna, exons)
+         
+         pdb.set_trace()
 
          # Calculate
          trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
 
-
          # Reshape currentW param 
          currentW = zeros((numFeatures,1))
       
@@ -177,9 +178,9 @@ def run():
          currentW[numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints+sizeMMatrix:numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints+sizeMMatrix+numQualSuppPoints,0] = qualityMatrix[:,0]
 
          currentPhi = zeros((numFeatures,1))
-         currentPhi[0:30]     = d.penalties[:]
-         currentPhi[30:60]    = a.penalties[:]
-         currentPhi[60:90]    = h.penalties[:]
+         currentPhi[0:30]     = mat(d.penalties[:]).reshape(30,1)
+         currentPhi[30:60]    = mat(a.penalties[:]).reshape(30,1)
+         currentPhi[60:90]    = mat(h.penalties[:]).reshape(30,1)
          currentPhi[90:126]   = mmatrix[:]
          currentPhi[126:]   = qualityMatrix[:]
 
@@ -204,38 +205,41 @@ def run():
          dna_len = len(dna)
          est_len = len(est)
 
+         ps = h.convert2SWIG()
+
          matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
          mm_len = 36
-         donor = QPalmaDP.createDoubleArrayFromList([1,2.0,4])
-         d_len = 3
-         acceptor = QPalmaDP.createDoubleArrayFromList([.1,2,4])
-         a_len = 3
+
+         d_len = len(donor)
+         donor = QPalmaDP.createDoubleArrayFromList(donor)
+         a_len = len(acceptor)
+         acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
+
          remove_duplicate_scores = False
          print_matrix = False
 
          currentAlignment = QPalmaDP.Alignment()
-
          qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
          currentAlignment.setQualityMatrix(qualityMat,numQualSuppPoints)
-         ps = h.convert2SWIG()
 
+         print 'PYTHON: Calling myalign...'
          # returns [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest]
          currentAlignment.myalign( nr_paths, dna, dna_len,\
          est, est_len, ps, matchmatrix, mm_len, donor, d_len,\
          acceptor, a_len, remove_duplicate_scores, print_matrix)
 
-         newSpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*nr_paths))
-         newEstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*nr_paths))
-         newWeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*nr_paths))
-         newAlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*nr_paths)
+         #newSpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*nr_paths))
+         #newEstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*nr_paths))
+         #newWeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*nr_paths))
+         #newAlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*nr_paths)
 
-         currentAlignment.getAlignmentResults(newSpliceAlign, newEstAlign, newWeightMatch, newAlignmentScores)
+         #currentAlignment.getAlignmentResults(newSpliceAlign, newEstAlign, newWeightMatch, newAlignmentScores)
 
-         print newSpliceAlign
+         #print newSpliceAlign
 
          print 'after myalign call...'
 
-         if iteration_nr == 2:
+         if exampleId == 2:
             break
 
       iteration_nr += 1
index 761d7ff..e4befad 100644 (file)
@@ -49,7 +49,7 @@ class Plf: #means piecewise linear function
       ps = QPalmaDP.penalty_struct()
    
       ps.limits = QPalmaDP.createDoubleArrayFromList(self.limits)
-      ps.penalties = QPalmaDP.createDoubleArrayFromList([elem[0] for elem in self.penalties.tolist()])
+      ps.penalties = QPalmaDP.createDoubleArrayFromList(self.penalties)
 
       ps.max_len = self.max_len
       ps.min_len = self.min_len
@@ -113,7 +113,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Gapfunktion
    ####################
    h.limits    = logspace(math.log(min_intron_len,10),math.log(max_intron_len,10),30) 
-   h.penalties = param[0:30] 
+   h.penalties = param[0:30].flatten().tolist()[0]
    #h.transform = '+1' 
    h.transform = '' 
    h.name      = 'h' 
@@ -128,7 +128,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Donorfunktion
    ####################
    d.limits    = linspace(min_svm_score,max_svm_score,30) 
-   d.penalties = param[30:60]
+   d.penalties = param[30:60].flatten().tolist()[0]
    #d.transform = '+1' 
    d.transform = '' 
    d.name      = 'd' 
@@ -143,7 +143,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Acceptorfunktion
    ####################
    a.limits    = linspace(min_svm_score,max_svm_score,30) 
-   a.penalties = param[60:90]
+   a.penalties = param[60:90].flatten().tolist()[0]
    #a.transform = '+1' 
    a.transform = '' 
    a.name      = 'a'