+ parameter transfer to python layer works
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 7 Jan 2008 10:35:42 +0000 (10:35 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 7 Jan 2008 10:35:42 +0000 (10:35 +0000)
+ first version of feature count is running
TODO
+ stack all Plif values into the weight vector
+ implement feature count for dna gaps

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7243 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/QPalmaDP.i
QPalmaDP/qpalma_dp.cpp
QPalmaDP/qpalma_dp.h
QPalmaDP/result_align.cpp
python/qpalma.py

index 8aa2554..1e7c4e8 100644 (file)
@@ -18,6 +18,7 @@
 
 %array_class(int, intArray) ;
 %array_class(double, doubleArray) ;
+%array_functions(double, doubleFArray) ;
 %array_functions(struct penalty_struct, penaltyArray) ;
 
 /*
index 1fa3a4f..f5e2977 100644 (file)
@@ -6,8 +6,6 @@ using namespace std;
   myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
   print_matrix) */
 
-struct penalty_struct *qualityScores = 0;
-
 Alignment::Alignment() {
       len = 0;
       limits = 0;
@@ -24,9 +22,7 @@ Alignment::Alignment() {
       est_align = 0;
       mmatrix_param = 0;
       alignmentscores = 0;
-      qualityMatrix = 0;
-
-      ::qualityScores = new struct penalty_struct[24];
+      qualityScoresAllPaths = 0;
 }
 
 void Alignment::setQualityMatrix(double* qMat, int length){
@@ -37,13 +33,6 @@ void Alignment::setQualityMatrix(double* qMat, int length){
       qualityMatrix[i] = qMat[i];
 }
 
-
-void Alignment::setMatchPlifs(struct penalty_struct match_plifs, int idx) {
-   struct penalty_struct *currentPlif = new struct penalty_struct();
-   copy_penalty_struct(&match_plifs,currentPlif);
-   ::qualityScores[idx] = (*currentPlif);
-}
-
 void Alignment::getDNAEST(){}
 
 void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
@@ -98,31 +87,42 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   int result_length; //Eine Variable fuer alle Matrizen
 
   // printf("before init of splice_align and rest...\n");
+  //
+   splice_align_size = (dna_len-1)*nr_paths;
+   est_align_size = (est_len-1)*nr_paths;
+   mmatrix_param_size = (mlen*mlen)*nr_paths;
+   alignmentscores_size = nr_paths; //alignment score for each path/matrix
+   qScores_size = 24*nr_paths; //alignment score for each path/matrix
+
+   //printf("dna_len is %d est_len is %d mmatrix_len is %d",splice_align_size, est_align_size, mmatrix_param_size);
 
-  splice_align = new int[(dna_len-1)*nr_paths];
-  est_align = new int[(est_len-1)*nr_paths];
-  mmatrix_param = new int[(mlen*mlen)*nr_paths];
+  splice_align = new int[splice_align_size];
+  est_align = new int[est_align_size];
+  mmatrix_param = new int[mmatrix_param_size];
   alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
-  qualityScoresAllPaths = new penalty_struct[24*nr_paths];
+  qualityScoresAllPaths = new penalty_struct[qScores_size];
   int qidx;
   for(qidx=0;qidx<24*nr_paths;qidx++) {
       penalty_struct p;
+      init_penalty_struct(p);
+      p.len = 10;
+      p.limits = (REAL*) calloc(p.len,sizeof(REAL));
+      p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
       qualityScoresAllPaths[qidx] = p;
   }
 
   // printf("before memset...\n");
-
   memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
   memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
   memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
   memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
-  
   // printf("after memset...\n");
+  
     // dnaest
    double *DNA_ARRAY = 0;
    double *EST_ARRAY = 0;
 
-  for (int z=0; z<nr_paths; z++) {      
+  for (int z=0; z<nr_paths; z++) {
     result_length = 0 ;
     
     int* s_align = splice_align + (dna_len-1)*z;  //pointer
@@ -130,13 +130,11 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     int* mparam  = mmatrix_param + (mlen*mlen)*z; //pointer
     penalty_struct* qparam = qualityScoresAllPaths + (24*z);
 
-    
     printf("before call to result_align...\n");
-  
     bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qparam);
     printf("after call to result_align...\n");
-      
-      if(DNA_ARRAY != 0) {
+
+   if(DNA_ARRAY != 0) {
        delete[] DNA_ARRAY;
        delete[] EST_ARRAY;
     }
@@ -182,6 +180,14 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
     
   } //end of z
 
+   // printf("Content of qualityScoresAllPaths\n");
+   // for(int iii=0;iii<qScores_size;iii++) {
+   //    for(int jjj=0;jjj<qualityScoresAllPaths[iii].len;jjj++) {
+   //       printf("%f ",qualityScoresAllPaths[iii].penalties[jjj]);
+   //    }
+   //    printf("\n");
+   // }
+
    if(DNA_ARRAY != 0) {
     delete[] DNA_ARRAY;
     delete[] EST_ARRAY;
@@ -196,12 +202,7 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
 void Alignment::getAlignmentResults(int* s_align, int* e_align,
       int* mmatrix_p, double* alignscores, penalty_struct* qScores) {
 
-   printf("Entering getAlignmentResults...\n");
-   uint splice_align_size = (dna_len-1)*nr_paths;
-   uint est_align_size = (est_len-1)*nr_paths;
-   uint mmatrix_param_size = (mlen*mlen)*nr_paths;
-   uint alignmentscores_size = nr_paths; //alignment score for each path/matrix
-   uint qScores_size = 24*nr_paths; //alignment score for each path/matrix
+   //printf("Entering getAlignmentResults...\n");
 
    int idx;
    for(idx=0; idx<splice_align_size; idx++)
@@ -213,11 +214,13 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
    for(idx=0; idx<mmatrix_param_size; idx++)
       mmatrix_p[idx] = mmatrix_param[idx];
 
-   for(idx=0; idx<splice_align_size; idx++)
+   for(idx=0; idx<alignmentscores_size; idx++)
       alignscores[idx] = alignmentscores[idx];
    
-   for(idx=0; idx<qScores_size; idx++)
+   for(idx=0; idx<qScores_size; idx++) {
+      init_penalty_struct(qScores[idx]);
       qScores[idx] = qualityScoresAllPaths[idx];
-
-   printf("Leaving getAlignmentResults...\n");
+      copy_penalty_struct(&qualityScoresAllPaths[idx],&qScores[idx]);
+   }
+   //printf("Leaving getAlignmentResults...\n");
 }
index 2e5b394..80173a7 100644 (file)
@@ -76,6 +76,12 @@ class Alignment {
       int mlen;
       int nr_paths;
 
+      uint splice_align_size ;
+      uint est_align_size ;
+      uint mmatrix_param_size ;
+      uint alignmentscores_size ;
+      uint qScores_size ;
+
       struct penalty_struct* qualityScoresAllPaths;
 
       INT len;
@@ -103,9 +109,6 @@ class Alignment {
 
          if(alignmentscores != 0)
             delete[] alignmentscores;
-
-         if(qualityMatrix != 0)
-            delete[] qualityMatrix;
       }
 
       void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
@@ -114,8 +117,6 @@ class Alignment {
       int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
       bool print_matrix);
 
-      void setMatchPlifs(struct penalty_struct match_plifs,int idx);
-
       void penSetLength(int l) { len = l; }
       void penSetLimits(REAL* lts) { 
          limits = new REAL[len];
index 99b3238..5f2a054 100644 (file)
@@ -5,7 +5,12 @@ using namespace std;
 
 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
 
-   penalty_struct currentStruct = qparam[estnum*6+dnanum];
+   if(dnanum == 0) {
+      return;
+   }
+
+   penalty_struct currentStruct = qparam[(estnum-1)*6+(dnanum-1)];
+   //printf("check_char is %d\n",(estnum-1)*6+(dnanum-1));
 
    double value = estprb;
    int Lower = 0;
@@ -106,7 +111,7 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
 
       prbnum = prb[i-1];
       chastitynum = chastity[i-1];
-      //increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      increaseFeatureCount(qparam,dnanum,estnum,prbnum);
 
       mparam[mlen*dnanum +estnum] ++ ;
     }
@@ -131,7 +136,7 @@ bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* r
 
       prbnum = prb[i-1];
       chastitynum = chastity[i-1];
-      //increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      increaseFeatureCount(qparam,dnanum,estnum,prbnum);
 
       mparam[mlen*dnanum +estnum] ++ ;
     }
index d615b6a..47f0dd0 100644 (file)
@@ -86,25 +86,7 @@ class QPalma:
       gen_file= '%s/genome.config' % self.ARGS.basedir
 
       ginfo_filename = 'genome_info.pickle'
-
-      if not os.path.exists(ginfo_filename):
-         cmd = ['']*4
-         cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
-         cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
-         cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
-         cmd[3] = 'save genome_info.mat genome_info'  
-         full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
-
-         obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-         out,err = obj.communicate()
-         assert err == '', 'An error occured!\n%s'%err
-
-         ginfo = scipy.io.loadmat('genome_info.mat')
-         self.genome_info = ginfo['genome_info']
-         cPickle.dump(self.genome_info,open(ginfo_filename,'w+'))
-
-      else:
-         self.genome_info = cPickle.load(open(ginfo_filename))
+      self.genome_info = fetch_genome_info(ginfo_filename)
 
       self.plog('genome_info.basedir is %s\n'%self.genome_info.basedir)
 
@@ -308,6 +290,8 @@ class QPalma:
 
             print "Fetched results from c-code"
 
+            #print '%d %d %d' % (dna_len*num_path[exampleIdx],est_len*num_path[exampleIdx],mm_len*num_path[exampleIdx])
+
             newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
             newEstAlign = zeros((est_len*num_path[exampleIdx],1))
             newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
@@ -334,7 +318,7 @@ class QPalma:
 
             print 'newQualityPlifs'
             for i in range(num_path[exampleIdx]*24):
-               newQualityPlifs[i+1] = c_qualityPlifs[i]
+               newQualityPlifs[i] = QPalmaDP.penaltyArray_getitem(c_qualityPlifs, i)
 
             print "Calling destructors"
 
@@ -364,8 +348,14 @@ class QPalma:
 
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
 
-               #
-               qualityWeights = computeSpliceQualityWeights()
+               testPlif = newQualityPlifs[24*pathNr]
+               print "testPlif.penalties"
+               print type(testPlif)
+               for tidx in range(testPlif.len):
+                  #elem = testPlif.penalties[tidx]
+                  elem = QPalmaDP.doubleFArray_getitem(testPlif.penalties, tidx)
+                  print '%f ' % elem, 
+               print
 
                # sum up positionwise loss between alignments
                for alignPosIdx in range(len(newSpliceAlign[pathNr,:])):
@@ -447,6 +437,26 @@ class QPalma:
       self.logfh.close()
       print 'Training completed'
 
+def fetch_genome_info(ginfo_filename):
+   if not os.path.exists(ginfo_filename):
+      cmd = ['']*4
+      cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
+      cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
+      cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
+      cmd[3] = 'save genome_info.mat genome_info'  
+      full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
+
+      obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+      out,err = obj.communicate()
+      assert err == '', 'An error occured!\n%s'%err
+
+      ginfo = scipy.io.loadmat('genome_info.mat')
+      cPickle.dump(self.genome_info,open(ginfo_filename,'w+'))
+      return ginfo['genome_info']
+
+   else:
+      return cPickle.load(open(ginfo_filename))
+
 if __name__ == '__main__':
    qpalma = QPalma()
    qpalma.run()