+ added code for negative strand processing
[qpalma.git] / dyn_prog / qpalma_dp.cpp
index c2cae37..a20af18 100644 (file)
@@ -1,3 +1,4 @@
+
 #include "qpalma_dp.h"
 #include <cstring>
 
@@ -24,7 +25,7 @@ Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
       est_align            = 0;
       mmatrix_param        = 0;
       alignmentscores      = 0;
-      qualityScoresAllPaths = 0;
+      qualityFeaturesAllPaths = 0;
       mlen = 6; // score matrix: length of 6 for "- A C G T N"
 
       numQualSuppPoints = numq;
@@ -60,8 +61,10 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
    currentMode = NORMAL;
 
     // dnaest
-   double *DNA_ARRAY = 0;
-   double *EST_ARRAY = 0;
+   DNA_ARRAY = 0;
+   EST_ARRAY = 0;
+
+  //printf("[qpalma_dp] read is: %s\n",est);
   
   //possible donor positions
   int nr_donor_sites = 0 ;
@@ -96,14 +99,13 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
    //      printf("%f ",pidx,p.penalties[pidx]);
    //   printf("\n");
    //}
-   //
 
    //int h_idx;
    //for(h_idx=0;h_idx<h.len;h_idx++)
    //   printf("%d %f %f\n",h_idx,h.limits[h_idx],h.penalties[h_idx]);
   
   //printf("calling fill_matrix...\n");
-  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions,currentMode);
+  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
   //printf("after call to fill_matrix...\n");
 
   /***************************************************************************/ 
@@ -142,7 +144,7 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
   //printf("after memset...\n");
   
-  qualityScoresAllPaths= new penalty_struct*[nr_paths];
+  qualityFeaturesAllPaths= new penalty_struct*[nr_paths];
 
   for (int z=0; z<nr_paths; z++) {
     result_length = 0 ;
@@ -151,87 +153,94 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     int* e_align = est_align + (est_len-1)*z ;    //pointer
     int* mparam  = mmatrix_param + mmatrix_size*z; //pointer
 
-   qualityScoresAllPaths[z] = new penalty_struct[numPlifs];
+   qualityFeaturesAllPaths[z] = new penalty_struct[numPlifs];
 
    int qidx, pidx;
    for(qidx=0;qidx<numPlifs;qidx++) {
       penalty_struct p;
+      init_penalty_struct(p);
       p.len = numQualSuppPoints;
-      p.limits = (REAL*) calloc(p.len,sizeof(REAL));
+      p.limits = (double*) calloc(p.len,sizeof(double));
 
       for(pidx=0;pidx<p.len;pidx++)
-         p.limits[pidx] = qualityScores[qidx%numPlifs].limits[pidx];
+         p.limits[pidx] = qualityScores[qidx].limits[pidx];
 
-      p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
-      qualityScoresAllPaths[z][qidx] = p;
+      p.penalties = (double*) calloc(p.len,sizeof(double));
+      qualityFeaturesAllPaths[z][qidx] = p;
    }
 
    //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
 
     //printf("before call to result_align...\n");
-    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityScoresAllPaths[z] , currentMode);
+    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityFeaturesAllPaths[z] , currentMode);
     //printf("after call to result_align...\n");
 
    //printf("z is %d\n",z);
+   //int len;
    //for(qidx=0;qidx<numPlifs;qidx++) {
    //   penalty_struct p;
-   //   p = qualityScoresAllPaths[qidx+numPlifs*z];
-   //   printf("%d ",qidx);
+   //   p = qualityScoresAllPaths[z][qidx];
+   //   printf("%d: ",qidx);
    //   for(pidx=0;pidx<p.len;pidx++)
-   //      printf("%f ",pidx,p.penalties[pidx]);
+   //      printf("%f ",p.limits[pidx]);
+   //   printf("\n");
+
+   //   for(pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",p.penalties[pidx]);
    //   printf("\n");
    //}
 
-   //if(DNA_ARRAY != 0) {
-   //    delete[] DNA_ARRAY;
-   //    delete[] EST_ARRAY;
-   // }
-
-   //DNA_ARRAY = new double[result_length];
-   //EST_ARRAY = new double[result_length];
-
-   // //backtracking
-   // int i = max_score_positions[2*z] ; //i (est)
-   // int j = max_score_positions[2*z +1] ; //j (dna)
-   // int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
-   // int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
-   // int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
-   // 
-   // for (int ii=result_length; ii>0; ii--) {
-   //   if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-   //   }
-   //   else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
-       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //EST_ARRAY[ii-1] = 0 ; //gap
-   //   }
-   //   else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
-       //DNA_ARRAY[ii-1] = 0 ; //gap
-       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-   //   }
-   //   else {// splice site
-       //for (j; j > prev_j; j--) {
-       //  DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //  EST_ARRAY[ii-1] = 6 ; //intron
-       //  ii-- ;
-       //}
-       //ii++ ; // last ii-- too much (because done in for-loop)       
-   //   }
-   //   
-   //   i = prev_i;
-   //   j = prev_j;    
-   //   prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
-   //   prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
-   //   prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
-   // }
-   // 
-   } //end of z
+   if(z==0) {
+
+   if(DNA_ARRAY != 0) {
+       delete[] DNA_ARRAY;
+       delete[] EST_ARRAY;
+    }
 
-   //if(DNA_ARRAY != 0) {
-   // delete[] DNA_ARRAY;
-   // delete[] EST_ARRAY;
-   // }
+   result_len = result_length;
+   
+   DNA_ARRAY = new int[result_length];
+   EST_ARRAY = new int[result_length];
+
+    //backtracking
+    int i = max_score_positions[2*z] ; //i (est)
+    int j = max_score_positions[2*z +1] ; //j (dna)
+    int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
+    int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+    int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+    
+    for (int ii=result_length; ii>0; ii--) {
+      if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+      }
+      else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
+       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       EST_ARRAY[ii-1] = 0 ; //gap
+      }
+      else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
+       DNA_ARRAY[ii-1] = 0 ; //gap
+       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+      }
+      else {// splice site
+       for (j; j > prev_j; j--) {
+         DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+         EST_ARRAY[ii-1] = 6 ; //intron
+         ii-- ;
+       }
+       ii++ ; // last ii-- too much (because done in for-loop) 
+      }
+      
+      i = prev_i;
+      j = prev_j;    
+      prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
+      prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
+      prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
+    }
+
+    } // end of "if z == 0"
+    
+   } //end of z
 
    for (int i=nr_paths-1;i>=0; i--)
       delete[] matrices[i];
@@ -259,24 +268,28 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
       for (int z=0; z<nr_paths; z++) {
          for(int estChar=1;estChar<6;estChar++) {
             for(int dnaChar=0;dnaChar<6;dnaChar++) {
+
                int currentPos = (estChar-1)*6+dnaChar;
-               currentPlif = qualityScoresAllPaths[z][currentPos];
+               currentPlif = qualityFeaturesAllPaths[z][currentPos];
+
                for(int pidx=0; pidx<currentPlif.len; pidx++) {
                   qScores[ctr] = currentPlif.penalties[pidx];
+                  //printf("%f ",qScores[ctr]);
                   ctr++;
                }
+               //printf("\n");
       }}}
+
+      //printf("\nctr is %d\n",ctr);
    }
+   //printf("Leaving getAlignmentResults...\n");
+}
 
-   //for(idx=0; idx<numPathsPlifs; idx++) {
-   //   currentPlif = qualityScoresAllPaths[idx];
-   //   //printf("Size is %dx%d\n",numPathsPlifs,currentPlif.len);
-   //   for(pidx=0; pidx<currentPlif.len; pidx++) {
-   //      qScores[ctr] = currentPlif.penalties[pidx];
-   //      ctr++;
-   //   }
-   //}
+void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
 
-   //printf("\nctr is %d\n",ctr);
-   //printf("Leaving getAlignmentResults...\n");
+   int idx;
+   for(idx=0; idx<result_len; idx++) {
+      dna_align[idx] = DNA_ARRAY[idx];
+      est_align[idx] = EST_ARRAY[idx];
+   }
 }