+ incorporated feature count for prb and chastity Plifs in result_align
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sun, 30 Dec 2007 15:03:42 +0000 (15:03 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sun, 30 Dec 2007 15:03:42 +0000 (15:03 +0000)
+ feature counts are returned as swigged array of plifs
TODO
- write a converter from wrapped swig arrays to numpy matrices
- might be useful to switch from structs to classes for convenience reasons

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7222 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/qpalma_dp.cpp
QPalmaDP/qpalma_dp.h
QPalmaDP/result_align.cpp
python/qpalma.py

index 0974136..3339376 100644 (file)
@@ -51,7 +51,6 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
       double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores, 
       bool remove_duplicate_scores, bool print_matrix) {
 
-
    // printf("Entering myalign...\n");
    mlen = 6; // score matrix: length of 6 for "- A C G T N"
    dna_len =  dna_len_p + 1 ;
@@ -104,6 +103,7 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   est_align = new int[(est_len-1)*nr_paths];
   mmatrix_param = new int[(mlen*mlen)*nr_paths];
   alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
+  qualityScoresAllPaths = new penalty_struct[24*nr_paths];
 
   // printf("before memset...\n");
 
@@ -123,8 +123,9 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     int* s_align = splice_align + (dna_len-1)*z;  //pointer
     int* e_align = est_align + (est_len-1)*z ;    //pointer
     int* mparam  = mmatrix_param + (mlen*mlen)*z; //pointer
+    penalty_struct* qparam = qualityScoresAllPaths + (24*z);
     
-    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, s_align, e_align, mparam, alignmentscores, max_score_positions);
+    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qparam);
       
       if(DNA_ARRAY != 0) {
        delete[] DNA_ARRAY;
@@ -184,7 +185,7 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
 }
 
 void Alignment::getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores) {
+      int* mmatrix_p, double* alignscores, penalty_struct* qScores) {
 
    // printf("Entering getAlignmentResults...\n");
    uint splice_align_size = (dna_len-1)*nr_paths;
@@ -192,17 +193,21 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
    uint mmatrix_param_size = (mlen*mlen)*nr_paths;
    uint alignmentscores_size = nr_paths; //alignment score for each path/matrix
 
-   for(int idx=0; idx<splice_align_size; idx++)
+   int idx;
+   for(idx=0; idx<splice_align_size; idx++)
       s_align[idx] = splice_align[idx];
 
-   for(int idx=0; idx<est_align_size; idx++)
+   for(idx=0; idx<splice_align_size; idx++)
       e_align[idx] =  est_align[idx];
 
-   for(int idx=0; idx<mmatrix_param_size; idx++)
+   for(idx=0; idx<splice_align_size; idx++)
       mmatrix_p[idx] = mmatrix_param[idx];
 
-   for(int idx=0; idx<alignmentscores_size; idx++)
+   for(idx=0; idx<splice_align_size; idx++)
       alignscores[idx] = alignmentscores[idx];
    
+   for(idx=0; idx<splice_align_size; idx++)
+      qScores[idx] = qualityScoresAllPaths[idx];
+
    // printf("Leaving getAlignmentResults...\n");
 }
index dbaf896..2e5b394 100644 (file)
@@ -33,7 +33,7 @@ void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna,
 
 int check_char(char base);
 
-bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam);
 
 extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
 
@@ -76,7 +76,7 @@ class Alignment {
       int mlen;
       int nr_paths;
 
-      struct penalty_struct* matching_plifs;
+      struct penalty_struct* qualityScoresAllPaths;
 
       INT len;
       REAL *limits;
@@ -126,7 +126,7 @@ class Alignment {
       void setQualityMatrix(double* qMat, int length);
       void getDNAEST();
       void getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores);
+      int* mmatrix_p, double* alignscores, penalty_struct* qScores);
 };
 
 #endif  // _QPALMA_DP_H_
index 2a49f68..1bfe6b1 100644 (file)
@@ -3,7 +3,31 @@
 
 using namespace std;
 
-bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions)
+void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
+
+   double value = estprb;
+   int Lower = 0;
+   int idx;
+   for (idx=0;idx<qparam->len;idx++) {
+      if (qparam->limits[idx] <= value)
+         Lower++;
+   }
+
+   if (Lower == 0)
+         qparam->penalties[0] += 1;
+
+   if (Lower == qparam->len)
+         qparam->penalties[qparam->len-1] += 1;
+
+   Lower -= 1;
+   int Upper = Lower+1; // x-werte bleiben fest
+   double weightup  = 1.0*(value - qparam->limits[Lower]) / (qparam->limits[Upper] - qparam->limits[Lower]);
+   double weightlow = 1.0*(qparam->limits[Upper] - value) / (qparam->limits[Upper] - qparam->limits[Lower]);
+   qparam->penalties[Upper] = qparam->penalties[Upper] + weightup;
+   qparam->penalties[Lower] = qparam->penalties[Lower] + weightlow;
+}
+
+bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam)
 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
 
 {  
@@ -14,6 +38,9 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
   //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
   int dnanum ; //using check_char: letter -> number
   int estnum ; //using check_char: letter -> number
+
+  double prbnum ;
+  double chastitynum ;
   
   int dna_pos ;
   int est_pos ;
@@ -74,6 +101,11 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
       
       dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
       estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+
+      prbnum = prb[i-1];
+      chastitynum = chastity[i-1];
+      increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+
       mparam[mlen*dnanum +estnum] ++ ;
     }
     else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
@@ -94,6 +126,11 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
 
       dnanum = 0 ; //gap
       estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+
+      prbnum = prb[i-1];
+      chastitynum = chastity[i-1];
+      increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+
       mparam[mlen*dnanum +estnum] ++ ;
     }
     else {// splice site
index a411ad2..8d7af91 100644 (file)
@@ -69,7 +69,6 @@ def initializeQualityScoringFunctions(numPlifs,numSuppPoints):
       curPlif = curPlif.convert2SWIG()
       qualityPlifs[idx] = curPlif
 
-   qualityPlifs = QPalmaDP.createPenaltyArrayFromList(qualityPlifs)
    return qualityPlifs
 
 class QPalma:
@@ -189,6 +188,8 @@ class QPalma:
       numPlifs = 24
       numSuppPoints = 30
 
+      qualityPlifs = initializeQualityScoringFunctions(numPlifs,numSuppPoints)
+
       #############################################################################################
       # Training
       #############################################################################################
@@ -214,6 +215,7 @@ class QPalma:
 
             # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
             trueSpliceAlign, trueWeightMatch = computeSpliceAlign(dna, exons)
+            trueSpliceAlign, trueWeightMatch, trueQualityPlifs = computeSpliceAlignWithQuality(dna, exons, qualityPlifs)
             
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
@@ -269,15 +271,13 @@ class QPalma:
             qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
             currentAlignment.setQualityMatrix(qualityMat,self.numQualSuppPoints)
 
-            qualityPlifs = initializeQualityScoringFunctions(numPlifs,numSuppPoints)
-
-            # pdb.set_trace()
+            c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList(qualityPlifs)
 
             #print 'PYTHON: Calling myalign...'
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
              est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
-             acceptor, a_len, qualityPlifs, remove_duplicate_scores, print_matrix)
+             acceptor, a_len, c_qualityPlifs, remove_duplicate_scores, print_matrix)
             #print 'PYTHON: After myalign call...'
 
             c_SpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
@@ -285,18 +285,29 @@ class QPalma:
             c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
             c_AlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
 
-            currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign, c_WeightMatch, c_AlignmentScores)
+            emptyPlif = Plf()
+            emptyPlif = curPlif.convert2SWIG()
+            c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList(emptyPlif*(24*num_path[exampleIdx]))
+
+            currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
+            c_WeightMatch, c_AlignmentScores, c_qualityPlifs)
             del currentAlignment
 
             newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
+            newEstAlign = zeros((est_len*num_path[exampleIdx],1))
             newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
 
-            print 'spliceAlign'
+            #print 'newSpliceAlign'
             for i in range(dna_len*num_path[exampleIdx]):
                newSpliceAlign[i] = c_SpliceAlign[i]
             #   print '%f' % (spliceAlign[i])
 
-            print 'weightMatch'
+            #print 'newEstAlign'
+            for i in range(est_len*num_path[exampleIdx]):
+               newEstAlign[i] = c_EstAlign[i]
+            #   print '%f' % (spliceAlign[i])
+
+            #print 'weightMatch'
             for i in range(mm_len*num_path[exampleIdx]):
                newWeightMatch[i] = c_WeightMatch[i]
             #   print '%f' % (weightMatch[i])
@@ -323,6 +334,9 @@ class QPalma:
 
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
 
+               #
+               qualityWeights = computeSpliceQualityWeights()
+
                # sum up positionwise loss between alignments
                for alignPosIdx in range(len(newSpliceAlign[pathNr,:])):
                   if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]: