+ fixed matchmatrix for the quality case to be a 1x6 matrix
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 16 Jan 2008 16:10:26 +0000 (16:10 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 16 Jan 2008 16:10:26 +0000 (16:10 +0000)
+ cleaned up the code a bit

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7401 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/fill_matrix.cpp
QPalmaDP/qpalma_dp.cpp
QPalmaDP/result_align.cpp
python/Configuration.py
python/computeSpliceAlignWithQuality.py
python/qpalma.py
python/set_param_palma.py
tools/plot_param.py

index 01615e8..6771f72 100644 (file)
@@ -84,9 +84,9 @@ double getScore(struct penalty_struct* qualityScores, double* matchmatrix, int m
       //printf("Starting scoring / scheme is USE_QUALITY_SCORES\n");
       //printf("estChar,rowSize,dnaChar are: %d,%d,%d",estChar,rowSize,dnaChar);
       if( estChar == 0 ) { // gap at est
-         score = matchmatrix[mlen*dnaChar+estChar];
+         score = matchmatrix[mlen*dnaChar];
       } else {
-         int currentPos = (estChar-1)*6+(dnaChar);
+         int currentPos = (estChar-1)*6+dnaChar;
          struct penalty_struct currentPen = qualityScores[currentPos];
 
          if(baseScore <= currentPen.limits[0]) {
@@ -351,10 +351,8 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
            scores[num_elem]=-tempValue ;
            num_elem++ ;
          }
-         
        }
 
-
        /***************************************/
        // 4. all possible splice sites/introns
        /***************************************/
@@ -397,8 +395,6 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
            }
          }
 
-         
-
           //all introns of length <= max_len
           for (int k=start_splice_pos; k < nr_donor_sites; k++) 
          //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
@@ -458,8 +454,6 @@ void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var:
     } //end of j
   } // end of i
 
-
-
   //free memory
   delete[] array;
   delete[] scores;
index 50d6b9b..fbad55b 100644 (file)
@@ -77,7 +77,6 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     }  
   }
 
-
   int* max_score_positions = new int[nr_paths*2];
 
   Pre_score** matrices = new Pre_score*[nr_paths];
@@ -86,51 +85,43 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   }
   
   // printf("calling fill_matrix...\n");
-  //
-
   fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions,currentMode);
-  
   //printf("after call to fill_matrix...\n");
+  
   /***************************************************************************/ 
   // return arguments etc.
   /***************************************************************************/
   int result_length; //Eine Variable fuer alle Matrizen
 
-  // printf("before init of splice_align and rest...\n");
-  //
    splice_align_size = (dna_len-1)*nr_paths;
    est_align_size = (est_len-1)*nr_paths;
+
+   //if (currentMode == USE_QUALITY_SCORES)
+   //   mmatrix_param_size = mlen*nr_paths;
+
+   //if (currentMode == NORMAL)
+
    mmatrix_param_size = (mlen*mlen)*nr_paths;
+
    alignmentscores_size = nr_paths; //alignment score for each path/matrix
    qScores_size = totalNumPlifs*nr_paths; //alignment score for each path/matrix
 
-   //printf("dna_len is %d est_len is %d mmatrix_len is %d",splice_align_size, est_align_size, mmatrix_param_size);
-
   splice_align = new int[splice_align_size];
   est_align = new int[est_align_size];
   mmatrix_param = new int[mmatrix_param_size];
   alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
   qualityScoresAllPaths = new penalty_struct[qScores_size];
-  int qidx, pidx;
 
+  int qidx, pidx;
   for(qidx=0;qidx<totalNumPlifs*nr_paths;qidx++) {
    penalty_struct p;
-   init_penalty_struct(p);
+   //init_penalty_struct(p);
    p.len = numQualSuppPoints;
    p.limits = (REAL*) calloc(p.len,sizeof(REAL));
    for(pidx=0;pidx<p.len;pidx++)
       p.limits[pidx] = qualityScores[qidx%totalNumPlifs].limits[pidx];
 
    p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
-
-   //for(pidx=0;pidx<p.len;pidx++)
-   //   printf("%f ",p.limits[pidx]);
-   //printf("\n");
-
-   //for(pidx=0;pidx<p.len;pidx++)
-   //   printf("%f ",p.penalties[pidx]);
-   //printf("\n");
-
    qualityScoresAllPaths[qidx] = p;
   }
 
@@ -140,7 +131,6 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
   memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
   //printf("after memset...\n");
-  
 
   for (int z=0; z<nr_paths; z++) {
     result_length = 0 ;
@@ -154,13 +144,13 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qparam, currentMode);
     //printf("after call to result_align...\n");
 
-  //for(qidx=0;qidx<totalNumPlifs*nr_paths;qidx++) {
-  // penalty_struct p;
-  // p = qualityScoresAllPaths[qidx];
-  // for(pidx=0;pidx<p.len;pidx++)
-  //    printf("%f ",p.penalties[pidx]);
-  // printf("\n");
-  //}
+   //for(qidx=0;qidx<totalNumPlifs*nr_paths;qidx++) {
+   //   penalty_struct p;
+   //   p = qualityScoresAllPaths[qidx];
+   //   for(pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",p.penalties[pidx]);
+   //   printf("\n");
+   //}
 
    //if(DNA_ARRAY != 0) {
    //    delete[] DNA_ARRAY;
@@ -215,15 +205,11 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
 
    for (int i=nr_paths-1;i>=0; i--)
       delete[] matrices[i];
-
-   //printf("Leaving myalign...\n");
 }
 
 void Alignment::getAlignmentResults(int* s_align, int* e_align,
       int* mmatrix_p, double* alignscores, double* qScores) {
 
-   //printf("Entering getAlignmentResults...\n");
-
    int idx;
    for(idx=0; idx<splice_align_size; idx++)
       s_align[idx] = splice_align[idx];
@@ -231,6 +217,8 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
    for(idx=0; idx<est_align_size; idx++)
       e_align[idx] =  est_align[idx];
 
+   mmatrix_param_size = 6;
+
    for(idx=0; idx<mmatrix_param_size; idx++)
       mmatrix_p[idx] = mmatrix_param[idx];
 
@@ -249,6 +237,6 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
       }
    }
 
-   //printf("ctr is %d\n",ctr);
+   //printf("\nctr is %d\n",ctr);
    //printf("Leaving getAlignmentResults...\n");
 }
index eabb365..89fc371 100644 (file)
@@ -9,7 +9,7 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
    FA(estnum > 0);
    FA(dnanum > -1);
 
-   int currentPos = (estnum-1)*6+(dnanum);
+   int currentPos = (estnum-1)*6+dnanum;
    penalty_struct currentStruct = qparam[currentPos];
 
    //printf("Current index %d est/dna: %d %d\n",currentPos,estnum,dnanum);
@@ -37,11 +37,13 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
 
    if (Lower == 0) {
          currentStruct.penalties[0] += 1;
+         qparam[currentPos] = currentStruct;
          return;
    }
 
-   if (Lower == (currentStruct.len-1)) {
+   if (Lower == (currentStruct.len)) {
          currentStruct.penalties[currentStruct.len-1] += 1;
+         qparam[currentPos] = currentStruct;
          return;
    }
 
@@ -62,8 +64,6 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
    //for(p_idx=0;p_idx<currentStruct.len;p_idx++)
    //   printf("%f ",currentStruct.penalties[p_idx]);
    //printf("\n");
-   //
-   
    qparam[currentPos] = currentStruct;
 }
 
@@ -105,7 +105,7 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
   int prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
   int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
 
-  printf("Current z is %d\n",z);
+  //printf("Current z is %d\n",z);
 
   if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
   {
index 033cf11..1a0cf49 100644 (file)
@@ -159,7 +159,7 @@ if mode == 'normal':
    sizeMatchmatrix   = (6,6)
    numQualPlifs = 0
 elif mode == 'using_quality_scores':
-   sizeMatchmatrix   = (6,6)
+   sizeMatchmatrix   = (6,1)
    estPlifs = 5
    dnaPlifs = 6
    numQualPlifs = estPlifs*dnaPlifs
index 1e2f77d..5108adb 100644 (file)
@@ -95,9 +95,15 @@ def  computeSpliceAlignWithQuality(dna, exons):
 
    # writing in weight match matrix
    # matrix is saved columnwise
-   trueWeightMatch = zeros((sizeMatchmatrix[0],sizeMatchmatrix[1])) # Scorematrix fuer Wahrheit
-   for idx in range(1,sizeMatchmatrix[0]):
-      trueWeightMatch[idx,idx] = numChar[idx-1]
+   if Configuration.mode == 'normal':
+      trueWeightMatch = zeros((sizeMatchmatrix[0],sizeMatchmatrix[1])) # Scorematrix fuer Wahrheit
+      for idx in range(1,sizeMatchmatrix[0]):
+         trueWeightMatch[idx,idx] = numChar[idx-1]
+
+   if Configuration.mode == 'using_quality_scores':
+      trueWeightMatch = zeros((sizeMatchmatrix[0],sizeMatchmatrix[1])) # Scorematrix fuer Wahrheit
+      for idx in range(1,sizeMatchmatrix[0]):
+         trueWeightMatch[idx] = numChar[idx-1]
 
    trueWeightMatch = trueWeightMatch.reshape(sizeMatchmatrix[0]*sizeMatchmatrix[1],1)
 
index 42939ef..7f78dba 100644 (file)
@@ -211,13 +211,18 @@ class QPalma:
             est = str(est)
             dna_len = len(dna)
             est_len = len(est)
+
+            #dna ='n'*dna_len
+            #est ='g'*est_len
+            #quality = [40]*len(quality)
+         
             ps = h.convert2SWIG()
 
             prb = QPalmaDP.createDoubleArrayFromList(quality)
             chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
 
             matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
-            mm_len = 36
+            mm_len = Configuration.sizeMatchmatrix[0]*Configuration.sizeMatchmatrix[1]
 
             d_len = len(donor)
             donor = QPalmaDP.createDoubleArrayFromList(donor)
@@ -240,7 +245,7 @@ class QPalma:
             c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
             c_AlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
 
-            c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Configuration.numQualSuppPoints*Configuration.numQualPlifs*num_path[exampleIdx]))
+            c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Configuration.totalQualSuppPoints*num_path[exampleIdx]))
 
             currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
             c_WeightMatch, c_AlignmentScores, c_qualityPlifsFeatures)
@@ -248,7 +253,7 @@ class QPalma:
             newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
             newEstAlign = zeros((est_len*num_path[exampleIdx],1))
             newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
-            newQualityPlifsFeatures = zeros((Configuration.numQualSuppPoints*Configuration.numQualPlifs*num_path[exampleIdx],1))
+            newQualityPlifsFeatures = zeros((Configuration.totalQualSuppPoints*num_path[exampleIdx],1))
 
             #print 'newSpliceAlign'
             for i in range(dna_len*num_path[exampleIdx]):
@@ -269,7 +274,7 @@ class QPalma:
             for i in range(num_path[exampleIdx]):
                AlignmentScores[i+1] = c_AlignmentScores[i]
 
-            for i in range(Configuration.numQualSuppPoints*Configuration.numQualPlifs*num_path[exampleIdx]):
+            for i in range(Configuration.totalQualSuppPoints*num_path[exampleIdx]):
                newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
 
             #  equals palma up to here
@@ -303,11 +308,8 @@ class QPalma:
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
 
                decodedQualityFeatures = zeros((Configuration.totalQualSuppPoints,1))
-
-               #print "Calculating pathNr %d" % pathNr
-
-               for qidx in range(Configuration.numQualPlifs*pathNr,Configuration.numQualPlifs*(pathNr+1)):
-                  decodedQualityFeatures[qidx%Configuration.numQualPlifs] = newQualityPlifsFeatures[qidx]
+               for qidx in range(Configuration.totalQualSuppPoints):
+                  decodedQualityFeatures[qidx] = newQualityPlifsFeatures[(pathNr*Configuration.totalQualSuppPoints)+qidx]
 
                # sum up positionwise loss between alignments
                for alignPosIdx in range(len(newSpliceAlign[pathNr,:])):
index 5b7aaec..c6f20ba 100644 (file)
@@ -114,7 +114,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Matchmatrix
    ####################
    mmatrix = numpy.matlib.mat(param[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP])
-   mmatrix.reshape(6,6
+   mmatrix.reshape(Configuration.sizeMatchmatrix[0],Configuration.sizeMatchmatrix[1]
 
    ####################
    # Quality Plifs
index f1c921c..4091033 100644 (file)
@@ -45,6 +45,8 @@ def plot_param(filename,train_with_intronlengthinformation=1):
         pylab.plot(plif.limits,plif.penalties,'b^-',hold=True)
     pylab.xlabel('quality value')
     pylab.ylabel('transition score')
+
+    pylab.show()
     
 
 if __name__ == '__main__':