+ changed interface for decoded plif features -> directly access double array
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 14 Jan 2008 17:17:44 +0000 (17:17 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 14 Jan 2008 17:17:44 +0000 (17:17 +0000)
+ using now a 5x6 matrix of plifs for scoring / features
TODO
- find annoying double free problem of python/swig interface

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7351 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/qpalma_dp.cpp
QPalmaDP/qpalma_dp.h
QPalmaDP/result_align.cpp
python/Configuration.py
python/computeSpliceAlignWithQuality.py
python/qpalma.py
python/set_param_palma.py

index b9cd6da..c2af278 100644 (file)
@@ -6,7 +6,7 @@ using namespace std;
   myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
   print_matrix) */
 
-Alignment::Alignment(int numq) {
+Alignment::Alignment(int numQPlifs, int numq) {
       len = 0;
       limits = 0;
       penalties = 0;
@@ -23,15 +23,9 @@ Alignment::Alignment(int numq) {
       mmatrix_param = 0;
       alignmentscores = 0;
       qualityScoresAllPaths = 0;
-      numQualSuppPoints = numq;
-}
 
-void Alignment::setQualityMatrix(double* qMat, int length){
-   if(qualityMatrix == 0)
-      qualityMatrix = new double[length];
-
-   for(int i=0; i<length; i++)
-      qualityMatrix[i] = qMat[i];
+      numQualSuppPoints = numq;
+      totalNumPlifs = numQPlifs;
 }
 
 void Alignment::getDNAEST(){}
@@ -50,6 +44,10 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   /***************************************************************************/ 
   // initialize alignment matrices and call fill_matrix()  
   /***************************************************************************/
+
+    // dnaest
+   double *DNA_ARRAY = 0;
+   double *EST_ARRAY = 0;
   
   //possible donor positions
   int nr_donor_sites = 0 ;
@@ -94,7 +92,7 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
    est_align_size = (est_len-1)*nr_paths;
    mmatrix_param_size = (mlen*mlen)*nr_paths;
    alignmentscores_size = nr_paths; //alignment score for each path/matrix
-   qScores_size = 25*nr_paths; //alignment score for each path/matrix
+   qScores_size = totalNumPlifs*nr_paths; //alignment score for each path/matrix
 
    //printf("dna_len is %d est_len is %d mmatrix_len is %d",splice_align_size, est_align_size, mmatrix_param_size);
 
@@ -104,13 +102,14 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
   qualityScoresAllPaths = new penalty_struct[qScores_size];
   int qidx, pidx;
-  for(qidx=0;qidx<25*nr_paths;qidx++) {
+
+  for(qidx=0;qidx<totalNumPlifs*nr_paths;qidx++) {
    penalty_struct p;
    init_penalty_struct(p);
    p.len = numQualSuppPoints;
    p.limits = (REAL*) calloc(p.len,sizeof(REAL));
    for(pidx=0;pidx<p.len;pidx++)
-      p.limits[pidx] = qualityScores[qidx%25].limits[pidx];
+      p.limits[pidx] = qualityScores[qidx%totalNumPlifs].limits[pidx];
 
    p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
 
@@ -132,9 +131,6 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
   // printf("after memset...\n");
   
-    // dnaest
-   double *DNA_ARRAY = 0;
-   double *EST_ARRAY = 0;
 
   for (int z=0; z<nr_paths; z++) {
     result_length = 0 ;
@@ -142,90 +138,81 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     int* s_align = splice_align + (dna_len-1)*z;  //pointer
     int* e_align = est_align + (est_len-1)*z ;    //pointer
     int* mparam  = mmatrix_param + (mlen*mlen)*z; //pointer
-    penalty_struct* qparam = qualityScoresAllPaths + (25*z);
+    penalty_struct* qparam = qualityScoresAllPaths + (totalNumPlifs*z);
 
     printf("before call to result_align...\n");
-    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qparam);
+    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qparam, currentMode);
     printf("after call to result_align...\n");
 
-  int qidx;
-  for(qidx=0;qidx<25*nr_paths;qidx++) {
-      penalty_struct p;
-      p = qualityScoresAllPaths[qidx];
-      int pidx;
-     for(pidx=0;pidx<p.len;pidx++)
-         printf("%f ",p.penalties[pidx]);
-      printf("\n");
+  for(qidx=0;qidx<totalNumPlifs*nr_paths;qidx++) {
+   penalty_struct p;
+   p = qualityScoresAllPaths[qidx];
+   for(pidx=0;pidx<p.len;pidx++)
+      printf("%f ",p.penalties[pidx]);
+   printf("\n");
   }
 
+ printf("after printf...\n");
 
-   if(DNA_ARRAY != 0) {
-       delete[] DNA_ARRAY;
-       delete[] EST_ARRAY;
-    }
-
-   DNA_ARRAY = new double[result_length];
-   EST_ARRAY = new double[result_length];
+   //if(DNA_ARRAY != 0) {
+   //    delete[] DNA_ARRAY;
+   //    delete[] EST_ARRAY;
+   // }
 
-    //backtracking
-    int i = max_score_positions[2*z] ; //i (est)
-    int j = max_score_positions[2*z +1] ; //j (dna)
-    int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
-    int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
-    int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
-    
-    for (int ii=result_length; ii>0; ii--) {
-      if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-      }
-      else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
-       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       EST_ARRAY[ii-1] = 0 ; //gap
-      }
-      else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
-       DNA_ARRAY[ii-1] = 0 ; //gap
-       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-      }
-      else {// splice site
-       for (j; j > prev_j; j--) {
-         DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-         EST_ARRAY[ii-1] = 6 ; //intron
-         ii-- ;
-       }
-       ii++ ; // last ii-- too much (because done in for-loop) 
-      }
-      
-      i = prev_i;
-      j = prev_j;    
-      prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
-      prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
-      prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
-    }
-    
-  } //end of z
-
-   // printf("Content of qualityScoresAllPaths\n");
-   // for(int iii=0;iii<qScores_size;iii++) {
-   //    for(int jjj=0;jjj<qualityScoresAllPaths[iii].len;jjj++) {
-   //       printf("%f ",qualityScoresAllPaths[iii].penalties[jjj]);
-   //    }
-   //    printf("\n");
+   //DNA_ARRAY = new double[result_length];
+   //EST_ARRAY = new double[result_length];
+
+   // //backtracking
+   // int i = max_score_positions[2*z] ; //i (est)
+   // int j = max_score_positions[2*z +1] ; //j (dna)
+   // int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
+   // int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+   // int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+   // 
+   // for (int ii=result_length; ii>0; ii--) {
+   //   if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+   //   }
+   //   else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
+       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       //EST_ARRAY[ii-1] = 0 ; //gap
+   //   }
+   //   else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
+       //DNA_ARRAY[ii-1] = 0 ; //gap
+       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+   //   }
+   //   else {// splice site
+       //for (j; j > prev_j; j--) {
+       //  DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       //  EST_ARRAY[ii-1] = 6 ; //intron
+       //  ii-- ;
+       //}
+       //ii++ ; // last ii-- too much (because done in for-loop)       
+   //   }
+   //   
+   //   i = prev_i;
+   //   j = prev_j;    
+   //   prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
+   //   prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
+   //   prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
    // }
+   // 
+   } //end of z
 
-   if(DNA_ARRAY != 0) {
-    delete[] DNA_ARRAY;
-    delete[] EST_ARRAY;
-    }
+   //if(DNA_ARRAY != 0) {
+   // delete[] DNA_ARRAY;
+   // delete[] EST_ARRAY;
+   // }
 
    for (int i=nr_paths-1;i>=0; i--)
       delete[] matrices[i];
 
-   //printf("Leaving myalign...\n");
+   printf("Leaving myalign...\n");
 }
 
 void Alignment::getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores, penalty_struct* qScores) {
+      int* mmatrix_p, double* alignscores, double* qScores) {
 
    //printf("Entering getAlignmentResults...\n");
 
@@ -242,10 +229,17 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
    for(idx=0; idx<alignmentscores_size; idx++)
       alignscores[idx] = alignmentscores[idx];
    
+   penalty_struct currentPlif;
+   int pidx;
+   int ctr=0;
    for(idx=0; idx<qScores_size; idx++) {
-      init_penalty_struct(qScores[idx]);
-      qScores[idx] = qualityScoresAllPaths[idx];
-      copy_penalty_struct(&qualityScoresAllPaths[idx],&qScores[idx]);
+      currentPlif = qualityScoresAllPaths[idx];
+      for(pidx=0; pidx<currentPlif.len; pidx++) {
+         qScores[ctr] = currentPlif.penalties[pidx];
+         ctr++;
+      }
    }
+
+   //printf("ctr is %d\n",ctr);
    //printf("Leaving getAlignmentResults...\n");
 }
index d1307d3..d42d084 100644 (file)
@@ -33,7 +33,7 @@ void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna,
 
 int check_char(char base);
 
-bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam);
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode);
 
 extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
 
@@ -69,7 +69,7 @@ class Alignment {
       int* est_align;
       int* mmatrix_param;
       double* alignmentscores;
-      double* qualityMatrix;
+      struct penalty_struct* qualityScoresAllPaths;
 
       int dna_len;
       int est_len;
@@ -77,6 +77,7 @@ class Alignment {
       int nr_paths;
 
       int numQualSuppPoints;
+      int totalNumPlifs;
 
       uint splice_align_size ;
       uint est_align_size ;
@@ -84,8 +85,6 @@ class Alignment {
       uint alignmentscores_size ;
       uint qScores_size ;
 
-      struct penalty_struct* qualityScoresAllPaths;
-
       INT len;
       REAL *limits;
       REAL *penalties;
@@ -98,7 +97,7 @@ class Alignment {
       INT use_svm;
 
    public:
-      Alignment(int numq);
+      Alignment(int numQPlifs ,int numq);
       ~Alignment() {
          if(splice_align != 0)
             delete[] splice_align;
@@ -111,6 +110,9 @@ class Alignment {
 
          if(alignmentscores != 0)
             delete[] alignmentscores;
+
+         //if(qualityScoresAllPaths != 0)
+         //   delete[] qualityScoresAllPaths;
       }
 
       void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
@@ -119,17 +121,9 @@ class Alignment {
       int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
       bool print_matrix);
 
-      void penSetLength(int l) { len = l; }
-      void penSetLimits(REAL* lts) { 
-         limits = new REAL[len];
-         for(int i=0; i<len; i++)
-            limits[i] = lts[i];
-      }
-      
-      void setQualityMatrix(double* qMat, int length);
       void getDNAEST();
       void getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores, penalty_struct* qScores);
+      int* mmatrix_p, double* alignscores, double* qScores);
 };
 
 #endif  // _QPALMA_DP_H_
index 4891d15..20dd4eb 100644 (file)
@@ -5,18 +5,8 @@ using namespace std;
 
 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
 
-   if(dnanum == 0) {
-      //printf("Zero entry\n");
-      return;
-   }
-
-   if(estnum == 0) { // we have a gap
-      //printf("Zero entry\n");
-      return;
-   }
-
-   penalty_struct currentStruct = qparam[(estnum-1)*5+(dnanum-1)];
-   printf("Current index %d\n",(estnum-1)*5+(dnanum-1));
+   penalty_struct currentStruct = qparam[(estnum-1)*6+(dnanum)];
+   printf("Current index %d dna/est: %d %d\n",(estnum-1)*6+(dnanum),dnanum,estnum);
 
       printf("before\n");
       int p_idx;
@@ -66,7 +56,7 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
    printf("\n");
 }
 
-bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam)
+bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
 
 {  
@@ -143,9 +133,12 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
 
       prbnum = prb[i-1];
       chastitynum = chastity[i-1];
-      increaseFeatureCount(qparam,dnanum,estnum,prbnum);
 
-      mparam[mlen*dnanum +estnum] ++ ;
+      if(currentMode == USE_QUALITY_SCORES)
+         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      else
+         mparam[mlen*dnanum+estnum] ++ ;
+
     }
     else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
       (*result_length_ptr)= (*result_length_ptr) + 1;
@@ -168,9 +161,11 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
 
       prbnum = prb[i-1];
       chastitynum = chastity[i-1];
-      increaseFeatureCount(qparam,dnanum,estnum,prbnum);
 
-      mparam[mlen*dnanum +estnum] ++ ;
+      if(currentMode == USE_QUALITY_SCORES)
+         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      else
+         mparam[mlen*dnanum +estnum] ++ ;
     }
     else {// splice site
       (*result_length_ptr) =  (*result_length_ptr) + (j - prev_j);
@@ -228,5 +223,3 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
 
   return 0;
 }
-
-
index 419b0be..554ec24 100644 (file)
@@ -54,6 +54,7 @@ fixedParam = numpy.matlib.mat(
        [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
        [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
        [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
        [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
        [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
        [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
@@ -79,7 +80,7 @@ fixedParam = numpy.matlib.mat(
 #
 #
 
-C = 1.0
+C = 100.0
 
 # 'normal' means work like Palma 'using_quality_scores' means work like Palma
 # plus using sequencing quality scores
@@ -92,7 +93,9 @@ mode = 'using_quality_scores'
 
 # When using quality scores our scoring function is defined as
 #
-# f: S x R x S -> R
+# f: S_e x R x S -> R
+#
+# where S_e is {A,C,G,T,N} and S = {A,C,G,T,N,-}
 # 
 # as opposed to a usage without quality scores when we only have
 # 
@@ -103,19 +106,22 @@ numAccSuppPoints     = 30
 numLengthSuppPoints  = 30 
 numQualSuppPoints    = 4
 
+min_qual = -1
+max_qual = 40
+
 if mode == 'normal':
-   sizeMMatrix          = 36
+   sizeMatchmatrix   = (6,6)
    numQualPlifs = 0
 elif mode == 'using_quality_scores':
-   sizeMMatrix          = 36 
-   numQualPlifs = 5*5
+   sizeMatchmatrix   = (6,6)
+   numQualPlifs = 5*5 #5*6
 else:
    assert False, 'Wrong operation mode specified'
 
 totalQualSuppPoints = numQualPlifs*numQualSuppPoints
 
 numFeatures = numDonSuppPoints + numAccSuppPoints\
-+ numLengthSuppPoints + sizeMMatrix + totalQualSuppPoints 
++ numLengthSuppPoints + sizeMatchmatrix[0]*sizeMatchmatrix[1] + totalQualSuppPoints 
 
 iter_steps = 2
 remove_duplicate_scores = False
index 0ba16e2..2cfc5f5 100644 (file)
@@ -25,8 +25,6 @@ def  computeSpliceAlignWithQuality(dna, exons):
    for idx in range(numberOfExons):
       exonSizes[idx] = exons[idx,1] - exons[idx,0]
 
-   sizeMatchmatrix = 6 # -acgtn
-
    # SpliceAlign vector describes alignment: 
    # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
    SpliceAlign = []
@@ -59,13 +57,14 @@ def  computeSpliceAlignWithQuality(dna, exons):
 
    assert totalESTLength == len(est) 
 
+   sizeMatchmatrix = Configuration.sizeMatchmatrix
    # counts the occurences of a,c,g,t,n in this order
-   numChar = [0]*sizeMatchmatrix
+   numChar = [0]*sizeMatchmatrix[0]
 
    # counts the occurrences of a,c,g,t,n with their quality scores
    trueWeightQuality = zeros((Configuration.numQualPlifs*Configuration.numQualSuppPoints,1))
 
-   for pos, elem in enumerate(est):
+   for elem in est:
       if elem == 'a':
          numChar[0] += 1
       if elem == 'c':
@@ -80,15 +79,18 @@ def  computeSpliceAlignWithQuality(dna, exons):
       trueWeightQuality[base_coord(elem,elem)*Configuration.numQualSuppPoints+Configuration.numQualSuppPoints-1] += 1.0
 
    totalNumChar = 0
-   for idx in range(sizeMatchmatrix):
+   for idx in range(sizeMatchmatrix[0]):
       totalNumChar += numChar[idx]
 
    assert totalNumChar == len(est), "numChar %d, len(est) %d" % (totalNumChar,len(est))
 
    # writing in weight match matrix
    # matrix is saved columnwise
-   trueWeightMatch = zeros((sizeMatchmatrix*sizeMatchmatrix,1)) # Scorematrix fuer Wahrheit
-   for idx in range(1,sizeMatchmatrix):
-      trueWeightMatch[(sizeMatchmatrix+1)*idx] = numChar[idx-1]
+   print numChar
+   trueWeightMatch = zeros((sizeMatchmatrix[0],sizeMatchmatrix[1])) # Scorematrix fuer Wahrheit
+   for idx in range(sizeMatchmatrix[0]-1):
+      trueWeightMatch[idx,idx] = numChar[idx]
+
+   trueWeightMatch = trueWeightMatch.reshape(sizeMatchmatrix[0]*sizeMatchmatrix[1],1)
 
    return SpliceAlign, trueWeightMatch, trueWeightQuality
index 6aadd92..ff4fc31 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#qualityquality!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ###########################################################
@@ -26,7 +26,6 @@ from paths_load_data_solexa import *
 
 from computeSpliceWeights import *
 from set_param_palma import *
-from computeSpliceAlign import *
 from computeSpliceAlignWithQuality import *
 from penalty_lookup_new import *
 from compute_donacc import *
@@ -120,7 +119,8 @@ class QPalma:
       donSP       = Configuration.numDonSuppPoints
       accSP       = Configuration.numAccSuppPoints
       lengthSP    = Configuration.numLengthSuppPoints
-      mmatrixSP   = Configuration.sizeMMatrix
+      mmatrixSP   = Configuration.sizeMatchmatrix[0]\
+      *Configuration.sizeMatchmatrix[1]
       numq        = Configuration.numQualSuppPoints
       totalQualSP = Configuration.totalQualSuppPoints
 
@@ -132,14 +132,15 @@ class QPalma:
          if iteration_nr == iteration_steps:
             break
 
-         #for exampleIdx in range(59,self.numExamples):
-         for exampleIdx in range(self.numExamples):
+         #for exampleIdx in range(self.numExamples):
+         for exampleIdx in range(6,11):
             if (exampleIdx%10) == 0:
                print 'Current example nr %d' % exampleIdx
 
             dna = Sequences[exampleIdx] 
             est = Ests[exampleIdx] 
             quality = Qualities[exampleIdx]
+            quality = [40]*len(quality )
             exons = Exons[exampleIdx] 
             # NoiseMatrix = Noises[exampleIdx] 
             don_supp = Donors[exampleIdx] 
@@ -152,9 +153,7 @@ class QPalma:
             trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
 
             trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
-
-            for pos,plif in enumerate(qualityPlifs):
-               totalQualityPenalties[pos*numq:(pos+1)*numq] = mat(plif.penalties).reshape(numq,1)
+            totalQualityPenalties = param[-totalQualSP:]
 
             currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
             currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
@@ -206,31 +205,33 @@ class QPalma:
             a_len = len(acceptor)
             acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
 
-            currentAlignment = QPalmaDP.Alignment(Configuration.numQualSuppPoints )
+            currentAlignment = QPalmaDP.Alignment(Configuration.numQualPlifs,Configuration.numQualSuppPoints)
             c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
-            #print 'PYTHON: Calling myalign...'
+
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
              est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
              acceptor, a_len, c_qualityPlifs, remove_duplicate_scores, print_matrix)
-            #print 'PYTHON: After myalign call...'
+
+            print 'Python: after myalign...'
 
             c_SpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
             c_EstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
             c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
             c_AlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
-
+           
             emptyPlif = Plf(Configuration.numQualSuppPoints)
             emptyPlif = emptyPlif.convert2SWIG()
-            c_qualityPlifsFeatures = QPalmaDP.createPenaltyArrayFromList([emptyPlif]*(Configuration.numQualPlifs*num_path[exampleIdx]))
+            c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Configuration.numQualPlifs*num_path[exampleIdx]))
 
             currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
             c_WeightMatch, c_AlignmentScores, c_qualityPlifsFeatures)
 
+            print 'Python: after getAlignmentResults...'
+
             newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
             newEstAlign = zeros((est_len*num_path[exampleIdx],1))
             newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
-            newQualityPlifs = [None]*num_path[exampleIdx]*Configuration.numQualPlifs
 
             #print 'newSpliceAlign'
             for i in range(dna_len*num_path[exampleIdx]):
@@ -251,11 +252,7 @@ class QPalma:
             for i in range(num_path[exampleIdx]):
                AlignmentScores[i+1] = c_AlignmentScores[i]
 
-            #print 'newQualityPlifs'
-            for i in range(num_path[exampleIdx]*Configuration.numQualPlifs):
-               newQualityPlifs[i] = QPalmaDP.penaltyArray_getitem(c_qualityPlifsFeatures, i)
-
-            #print "Calling destructors"
+            print "Calling destructors"
 
             del c_SpliceAlign
             del c_EstAlign
@@ -281,16 +278,9 @@ class QPalma:
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
 
                decodedQualityFeatures = zeros((Configuration.totalQualSuppPoints,1))
-               qidx = 0
 
-               for currentPlif in newQualityPlifs[Configuration.numQualPlifs*pathNr:Configuration.numQualPlifs*(pathNr+1)]:
-                  for tidx in range(currentPlif.len):
-                     #elem = currentPlif.penalties[tidx]
-                     elem = QPalmaDP.doubleFArray_getitem(currentPlif.penalties, tidx)
-                     #print '%f ' % elem, 
-                     decodedQualityFeatures[qidx] = elem
-                     qidx += 1
-                  #print
+               for qidx in range(Configuration.numQualPlifs*pathNr,Configuration.numQualPlifs*(pathNr+1)):
+                decodedQualityFeatures[qidx%Configuration.numQualPlifs] = c_qualityPlifsFeatures[qidx]
 
                # sum up positionwise loss between alignments
                for alignPosIdx in range(len(newSpliceAlign[pathNr,:])):
@@ -336,7 +326,7 @@ class QPalma:
                   trueWeights       = allWeights[:,0]
                   firstFalseWeights = allWeights[:,firstFalseIdx]
                   differenceVector  = firstFalseWeights - trueWeights
-                  pdb.set_trace()
+                  #pdb.set_trace()
 
                   if not __debug__:
                      const_added = solver.addConstraint(differenceVector, exampleIdx)
@@ -344,6 +334,7 @@ class QPalma:
                #
                # end of one example processing 
                #
+            del c_qualityPlifsFeatures
 
             # call solver every nth step
             if exampleIdx != 0 and exampleIdx % 3 == 0 and not __debug__:
@@ -359,8 +350,8 @@ class QPalma:
                   param[i] = w[i]
 
                [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+
             #   break
-         
          #break
 
          #
@@ -371,9 +362,10 @@ class QPalma:
       #
       # end of optimization 
       #  
+      print 'Training completed'
+      pdb.set_trace()
       export_param('elegans.param',h,d,a,mmatrix)
       self.logfh.close()
-      print 'Training completed'
 
 def fetch_genome_info(ginfo_filename):
    if not os.path.exists(ginfo_filename):
@@ -398,40 +390,5 @@ def fetch_genome_info(ginfo_filename):
 if __name__ == '__main__':
    qpalma = QPalma()
    qpalma.run()
-
-def initializeQualityScoringFunctions(numPlifs,numSuppPoints):
-
-   min_intron_len=20
-   max_intron_len=1000
-   min_svm_score=-5
-   max_svm_score=5
-
-   qualityPlifs = [None]*numPlifs
-
-   for idx in range(numPlifs):
-
-      curPlif = Plf()
-      curPlif.limits    = linspace(min_svm_score,max_svm_score,numSuppPoints) 
-      curPlif.penalties = [0]*numSuppPoints
-      curPlif.transform = '' 
-      curPlif.name      = '' 
-      curPlif.max_len   = 100 
-      curPlif.min_len   = -100 
-      curPlif.id        = 1 
-      curPlif.use_svm   = 0 
-      curPlif.next_id   = 0 
-
-      if idx == 0:
-         curPlif.penalties[0] = 11
-         curPlif.penalties[1] = 22
-         curPlif.penalties[2] = 33
-
-      if idx == 1:
-         curPlif.penalties[0] = 99
-         curPlif.penalties[1] = 100
-         curPlif.penalties[2] = 101
-
-      curPlif = curPlif.convert2SWIG()
-      qualityPlifs[idx] = curPlif
-
-   return qualityPlifs
+   pdb.set_trace()
+   l = [elem+1 for elem in range(44)]
index 2859d2d..694eb2e 100644 (file)
@@ -62,7 +62,8 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    donSP       = Configuration.numDonSuppPoints
    accSP       = Configuration.numAccSuppPoints
    lengthSP    = Configuration.numLengthSuppPoints
-   mmatrixSP   = Configuration.sizeMMatrix
+   mmatrixSP   = Configuration.sizeMatchmatrix[0]\
+   *Configuration.sizeMatchmatrix[1]
    totalQualSP = Configuration.totalQualSuppPoints
 
    ####################
@@ -120,15 +121,15 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    ####################
    for idx in range(Configuration.numQualPlifs):
       currentPlif = Plf()
-      currentPlif.limits    = linspace(min_svm_score,max_svm_score,Configuration.numQualSuppPoints) 
+      currentPlif.limits    = linspace(Configuration.min_qual,Configuration.max_qual,Configuration.numQualSuppPoints) 
       begin                 = lengthSP+donSP+accSP+mmatrixSP+(idx*Configuration.numQualSuppPoints)
       end                   = lengthSP+donSP+accSP+mmatrixSP+((idx+1)*Configuration.numQualSuppPoints)
+      print begin,end
       currentPlif.penalties = param[begin:end].flatten().tolist()[0]
       currentPlif.transform = '' 
       currentPlif.name      = 'q' 
-      currentPlif.max_len   = 100  
-      currentPlif.min_len   = -100  
-
+      currentPlif.max_len   = Configuration.max_qual 
+      currentPlif.min_len   = Configuration.min_qual
       qualityPlifs[idx] = currentPlif
 
    return h,d,a,mmatrix,qualityPlifs