+ changed parameter transfer to support array of plifs
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sat, 29 Dec 2007 20:51:41 +0000 (20:51 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sat, 29 Dec 2007 20:51:41 +0000 (20:51 +0000)
+ the quality scores are stored in an array of plifs
TODO
- calculate feature scores for quality plifs

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7221 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/QPalmaDP.i
QPalmaDP/fill_matrix.cpp
QPalmaDP/fill_matrix.h
QPalmaDP/penalty_info.h
QPalmaDP/qpalma_dp.cpp
QPalmaDP/qpalma_dp.h
python/qpalma.py

index 73b4054..8aa2554 100644 (file)
@@ -18,6 +18,8 @@
 
 %array_class(int, intArray) ;
 %array_class(double, doubleArray) ;
+%array_functions(struct penalty_struct, penaltyArray) ;
+
 /*
 %array_functions(int, intArray) ;
 %array_functions(double, doubleArray) ;
 %pythoncode
 %{
 
+def createPenaltyArrayFromList(list):
+   array = new_penaltyArray(len(list))
+   for i in range(len(list)):
+      penaltyArray_setitem(array, i, list[i])
+   return array
+
 def createDoubleArrayFromList(list):
    #array = new_doubleArray(len(list))
    array = doubleArray(len(list))
index 9dfebc1..ff55671 100644 (file)
@@ -69,14 +69,7 @@ int check_char(char base)
  * quality score.
  */
 
-double scoreMatch(int dnaChar, int estChar, double baseScore){
-   for(int idx=0;idx<24;idx++)
-      struct penalty_struct currentPen = ::qualityScores[idx];
-
-   return 1.0;
-}
-
-double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar, double baseScore, mode scoringScheme) {
+double getScore(struct penalty_struct* qualityScores, double* matchmatrix, int mlen, int dnaChar, int estChar, double baseScore, mode scoringScheme) {
    double score = 0.0;
 
    if( scoringScheme == NORMAL ) {
@@ -89,14 +82,18 @@ double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar, double b
       if( estChar != -1 && dnaChar != -1 )
          score = matchmatrix[mlen* dnaChar +estChar];
    } else {
-         score = scoreMatch(dnaChar,estChar,baseScore);
+         int estIdx = check_char(estChar) - 1;
+         int dnaIdx = check_char(dnaChar) - 1;
+         // assert 0 <= estIdx <= 3
+         int rowSize = 6;
+         struct penalty_struct currentPen = qualityScores[estIdx*rowSize+dnaIdx];
+         score = 0;//scoreMatch(dnaChar,estChar,baseScore);
    }
 
    return score;
 }
 
-
-void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, penalty_struct* functions , double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
+void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, penalty_struct* functions , double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions, mode currentMode)
 {      
   // printf("Entering fill_matrix...\n");
 
@@ -106,14 +103,14 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
   const int mlen = 6; // length of matchmatrix
   int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
 
+  printf("1st plif first three elems %f %f %f\n",qualityScores[0].penalties[0],qualityScores[0].penalties[1],qualityScores[0].penalties[2]);
+  printf("2nd plif first three elems %f %f %f\n",qualityScores[1].penalties[0],qualityScores[1].penalties[1],qualityScores[1].penalties[2]);
 
   double baseScore;
   double *est_scores = new double[est_len];
   for(int i=0;i<est_len;i++)
    est_scores[i] = 0.0;
 
-   mode currentMode = NORMAL;
-
 
   double prevValue ;
   double tempValue;
@@ -303,7 +300,7 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
        if (isfinite(prevValue))
        {
          // tempValue = prevValue +(matchmatrix[mlen* dnaChar]);   /* score(gap,DNA) */
-         tempValue = prevValue + getScore(matchmatrix,mlen,dnaChar,-1,baseScore,currentMode);
+         tempValue = prevValue + getScore(qualityScores,matchmatrix,mlen,dnaChar,-1,baseScore,currentMode);
          if (isfinite(tempValue))
          {
            assert(num_elem<max_num_elem) ;
@@ -324,7 +321,7 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
        if (isfinite(prevValue))
        {
          //tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);//DNA mit EST
-         tempValue = prevValue + getScore(matchmatrix,mlen,dnaChar,estChar,baseScore,currentMode);
+         tempValue = prevValue + getScore(qualityScores,matchmatrix,mlen,dnaChar,estChar,baseScore,currentMode);
          if (isfinite(tempValue))
          {
            assert(num_elem<max_num_elem) ;
@@ -345,7 +342,7 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
        if (isfinite(prevValue))
        {
          // tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
-         tempValue = prevValue + getScore(matchmatrix,mlen,-1,estChar,baseScore,currentMode);
+         tempValue = prevValue + getScore(qualityScores,matchmatrix,mlen,-1,estChar,baseScore,currentMode);
          if (isfinite(tempValue))
          {
            assert(num_elem<max_num_elem) ;
index 4c3a10e..1a0deaf 100644 (file)
@@ -39,7 +39,7 @@ typedef struct Align_pair { //24B
 };
 
 
-void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
 // funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
 
 int check_char(char base);
@@ -50,7 +50,6 @@ extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Alig
 
 double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar);
 
-enum mode { NORMAL, USE_QUALITY_SCORES};
 
 double scoreMatch(int dnaChar, int estChar, double baseScore);
 
index 44ce621..a5725b0 100644 (file)
@@ -42,4 +42,6 @@ REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value,
 
 extern struct penalty_struct *qualityScores;
 
+enum mode { NORMAL, USE_QUALITY_SCORES};
+
 #endif
index 4bd4f44..0974136 100644 (file)
@@ -47,10 +47,11 @@ void Alignment::setMatchPlifs(struct penalty_struct match_plifs, int idx) {
 void Alignment::getDNAEST(){}
 
 void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
-      int est_len_p, struct penalty_struct h, double* matchmatrix, int mm_len,
-      double* donor, int d_len, double* acceptor, int a_len,
+      int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
+      double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores, 
       bool remove_duplicate_scores, bool print_matrix) {
 
+
    // printf("Entering myalign...\n");
    mlen = 6; // score matrix: length of 6 for "- A C G T N"
    dna_len =  dna_len_p + 1 ;
@@ -86,8 +87,10 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   }
   
   // printf("calling fill_matrix...\n");
+  //
+   mode currentMode = NORMAL;
 
-  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, &h, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions,currentMode);
   
   // printf("after call to fill_matrix...\n");
   /***************************************************************************/ 
@@ -203,4 +206,3 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
    
    // printf("Leaving getAlignmentResults...\n");
 }
-
index 7f9817e..dbaf896 100644 (file)
@@ -29,7 +29,7 @@ typedef struct Align_pair { //24B
   char dna_char;
 };
 
-void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions, mode currentMode);
 
 int check_char(char base);
 
@@ -53,6 +53,13 @@ extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Alig
  * print_matrix               -> a boolean
  *
  * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
+ *
+ * the idea of the qualityScores array is as follows
+ *
+ * consider a matrix of 24 plifs
+ * 
+ * -> row major
+ *
  */
 
 class Alignment {
@@ -102,9 +109,10 @@ class Alignment {
       }
 
       void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
-      int est_len_p, struct penalty_struct h, double* matchmatrix, int mm_len,
-      double* donor, int d_len, double* acceptor, int a_len,
-      bool remove_duplicate_scores, bool print_matrix);
+      int est_len_p, double* prb, double* chastity, struct penalty_struct h, 
+      double* matchmatrix, int mm_len, double* donor, int d_len, double* acceptor,
+      int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
+      bool print_matrix);
 
       void setMatchPlifs(struct penalty_struct match_plifs,int idx);
 
index 57eccef..a411ad2 100644 (file)
@@ -3,7 +3,7 @@
 
 ###########################################################
 #
-# This file contains the 
+# 
 #
 ###########################################################
 
@@ -34,13 +34,15 @@ import Configuration
 
 
 
-def initializeQualityScoringFunctions(alignment,numPlifs,numSuppPoints):
+def initializeQualityScoringFunctions(numPlifs,numSuppPoints):
 
    min_intron_len=20
    max_intron_len=1000
    min_svm_score=-5
    max_svm_score=5
 
+   qualityPlifs = [None]*numPlifs
+
    for idx in range(numPlifs):
 
       curPlif = Plf()
@@ -54,9 +56,21 @@ def initializeQualityScoringFunctions(alignment,numPlifs,numSuppPoints):
       curPlif.use_svm   = 0 
       curPlif.next_id   = 0 
 
+      if idx == 0:
+         curPlif.penalties[0] = 11
+         curPlif.penalties[1] = 22
+         curPlif.penalties[2] = 33
+
+      if idx == 1:
+         curPlif.penalties[0] = 99
+         curPlif.penalties[1] = 100
+         curPlif.penalties[2] = 101
+
       curPlif = curPlif.convert2SWIG()
-      alignment.setMatchPlifs(curPlif,idx)
+      qualityPlifs[idx] = curPlif
 
+   qualityPlifs = QPalmaDP.createPenaltyArrayFromList(qualityPlifs)
+   return qualityPlifs
 
 class QPalma:
    """
@@ -114,7 +128,7 @@ class QPalma:
 
       # this number defines the number of support points for one tuple (a,b)
       # where 'a' comes with a quality score
-      self.numQualSuppPoints    = 30
+      self.numQualSuppPoints    = 10
       self.numQualSuppPoints    = 0
 
       self.numFeatures = self.numDonSuppPoints + self.numAccSuppPoints\
@@ -240,6 +254,9 @@ class QPalma:
             est_len = len(est)
             ps = h.convert2SWIG()
 
+            prb = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
+            chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
+
             matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
             mm_len = 36
 
@@ -252,41 +269,43 @@ class QPalma:
             qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
             currentAlignment.setQualityMatrix(qualityMat,self.numQualSuppPoints)
 
-            initializeQualityScoringFunctions(currentAlignment,numPlifs,numSuppPoints)
+            qualityPlifs = initializeQualityScoringFunctions(numPlifs,numSuppPoints)
+
+            # pdb.set_trace()
 
             #print 'PYTHON: Calling myalign...'
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
-             est, est_len, ps, matchmatrix, mm_len, donor, d_len,\
-             acceptor, a_len, remove_duplicate_scores, print_matrix)
+             est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
+             acceptor, a_len, qualityPlifs, remove_duplicate_scores, print_matrix)
             #print 'PYTHON: After myalign call...'
 
-            newSpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
-            newEstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
-            newWeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
-            newAlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
+            c_SpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
+            c_EstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
+            c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
+            c_AlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
 
-            currentAlignment.getAlignmentResults(newSpliceAlign, newEstAlign, newWeightMatch, newAlignmentScores)
+            currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign, c_WeightMatch, c_AlignmentScores)
             del currentAlignment
 
-            spliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
-            weightMatch = zeros((num_path[exampleIdx]*mm_len,1))
+            newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
+            newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
 
-            #print 'spliceAlign'
-            #for i in range(dna_len*num_path[exampleIdx]):
-            #   spliceAlign[i] = newSpliceAlign[i]
+            print 'spliceAlign'
+            for i in range(dna_len*num_path[exampleIdx]):
+               newSpliceAlign[i] = c_SpliceAlign[i]
             #   print '%f' % (spliceAlign[i])
 
-            #print 'weightMatch'
-            #for i in range(mm_len*num_path[exampleIdx]):
-            #   weightMatch[i] = newWeightMatch[i]
+            print 'weightMatch'
+            for i in range(mm_len*num_path[exampleIdx]):
+               newWeightMatch[i] = c_WeightMatch[i]
             #   print '%f' % (weightMatch[i])
 
             for i in range(num_path[exampleIdx]):
-               AlignmentScores[i+1] = newAlignmentScores[i]
+               AlignmentScores[i+1] = c_AlignmentScores[i]
 
-            spliceAlign = spliceAlign.reshape(num_path[exampleIdx],dna_len)
-            weightMatch = weightMatch.reshape(num_path[exampleIdx],mm_len)
+            newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],dna_len)
+            newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
             # Calculate weights of the respective alignments Note that we are
             # calculating n-best alignments without any hamming loss, so we
             # have to keep track which of the n-best alignments correspond to
@@ -302,15 +321,15 @@ class QPalma:
                #dna_numbers = dnaest{1,pathNr}
                #est_numbers = dnaest{2,pathNr}
 
-               weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, spliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
+               weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
 
                # sum up positionwise loss between alignments
-               for alignPosIdx in range(len(spliceAlign[pathNr,:])):
-                  if spliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
+               for alignPosIdx in range(len(newSpliceAlign[pathNr,:])):
+                  if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
                      path_loss[pathNr+1] += 1
 
                # Gewichte in restliche Zeilen der Matrix speichern
-               wp = numpy.vstack([weightIntron, weightDon, weightAcc, weightMatch[pathNr,:].T, qualityMatrix ])
+               wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, qualityMatrix ])
                allWeights[:,pathNr+1] = wp
 
                hpen = mat(h.penalties).reshape(len(h.penalties),1)