+ rearranged code
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 6 Feb 2008 14:47:29 +0000 (14:47 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 6 Feb 2008 14:47:29 +0000 (14:47 +0000)
+ added capability produce "pretty-print" alignment output using palma scripts

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7694 e1793c9e-67f9-0310-80fc-b846ff1f7b36

dyn_prog/qpalma_dp.cpp
dyn_prog/qpalma_dp.h
qpalma/Configuration.py
qpalma/SIQP.py
qpalma/SIQP_CPX.py
scripts/qpalma_main.py
tools/plot_param.py

index c2cae37..e2bdb35 100644 (file)
@@ -60,8 +60,8 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
    currentMode = NORMAL;
 
     // dnaest
-   double *DNA_ARRAY = 0;
-   double *EST_ARRAY = 0;
+   DNA_ARRAY = 0;
+   EST_ARRAY = 0;
   
   //possible donor positions
   int nr_donor_sites = 0 ;
@@ -181,57 +181,58 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
    //      printf("%f ",pidx,p.penalties[pidx]);
    //   printf("\n");
    //}
+   //
+   if(z==0) {
 
-   //if(DNA_ARRAY != 0) {
-   //    delete[] DNA_ARRAY;
-   //    delete[] EST_ARRAY;
-   // }
-
-   //DNA_ARRAY = new double[result_length];
-   //EST_ARRAY = new double[result_length];
-
-   // //backtracking
-   // int i = max_score_positions[2*z] ; //i (est)
-   // int j = max_score_positions[2*z +1] ; //j (dna)
-   // int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
-   // int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
-   // int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
-   // 
-   // for (int ii=result_length; ii>0; ii--) {
-   //   if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-   //   }
-   //   else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
-       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //EST_ARRAY[ii-1] = 0 ; //gap
-   //   }
-   //   else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
-       //DNA_ARRAY[ii-1] = 0 ; //gap
-       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-   //   }
-   //   else {// splice site
-       //for (j; j > prev_j; j--) {
-       //  DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //  EST_ARRAY[ii-1] = 6 ; //intron
-       //  ii-- ;
-       //}
-       //ii++ ; // last ii-- too much (because done in for-loop)       
-   //   }
-   //   
-   //   i = prev_i;
-   //   j = prev_j;    
-   //   prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
-   //   prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
-   //   prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
-   // }
-   // 
-   } //end of z
+   if(DNA_ARRAY != 0) {
+       delete[] DNA_ARRAY;
+       delete[] EST_ARRAY;
+    }
+
+   result_len = result_length;
+   
+   DNA_ARRAY = new int[result_length];
+   EST_ARRAY = new int[result_length];
+
+    //backtracking
+    int i = max_score_positions[2*z] ; //i (est)
+    int j = max_score_positions[2*z +1] ; //j (dna)
+    int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
+    int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+    int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+    
+    for (int ii=result_length; ii>0; ii--) {
+      if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+      }
+      else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
+       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       EST_ARRAY[ii-1] = 0 ; //gap
+      }
+      else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
+       DNA_ARRAY[ii-1] = 0 ; //gap
+       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+      }
+      else {// splice site
+       for (j; j > prev_j; j--) {
+         DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+         EST_ARRAY[ii-1] = 6 ; //intron
+         ii-- ;
+       }
+       ii++ ; // last ii-- too much (because done in for-loop) 
+      }
+      
+      i = prev_i;
+      j = prev_j;    
+      prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
+      prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
+      prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
+    }
 
-   //if(DNA_ARRAY != 0) {
-   // delete[] DNA_ARRAY;
-   // delete[] EST_ARRAY;
-   // }
+    } // end of "if z == 0"
+    
+   } //end of z
 
    for (int i=nr_paths-1;i>=0; i--)
       delete[] matrices[i];
@@ -280,3 +281,12 @@ void Alignment::getAlignmentResults(int* s_align, int* e_align,
    //printf("\nctr is %d\n",ctr);
    //printf("Leaving getAlignmentResults...\n");
 }
+
+void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
+
+   int idx;
+   for(idx=0; idx<result_len; idx++) {
+      dna_align[idx] = DNA_ARRAY[idx];
+      est_align[idx] = EST_ARRAY[idx];
+   }
+}
index 085d6af..69a0ef5 100644 (file)
@@ -78,6 +78,10 @@ class Alignment {
       int mlen;
       int nr_paths;
 
+      int result_len;
+      int* DNA_ARRAY;
+      int* EST_ARRAY;
+
       int numQualSuppPoints;
       int numPlifs;
       bool use_quality_scores;
@@ -127,6 +131,9 @@ class Alignment {
       void getDNAEST();
       void getAlignmentResults(int* s_align, int* e_align,
       int* mmatrix_p, double* alignscores, double* qScores);
+
+      int getResultLength() { return result_len; }
+      void getAlignmentArrays(int* dna_align, int* est_align);
 };
 
 #endif  // _QPALMA_DP_H_
index eb913fd..6cf2fe7 100644 (file)
@@ -26,6 +26,8 @@ max_intron_len = 2000
 min_svm_score = 0.0 
 max_svm_score = 1.0
 
+numConstraintsPerRound = 100
+
 ###############################################################################
 # 
 # CHOOSING THE MODE 
@@ -80,7 +82,7 @@ mode = 'using_quality_scores'
 numLengthSuppPoints  = 20 
 numDonSuppPoints     = 20
 numAccSuppPoints     = 20
-numQualSuppPoints    = 4
+numQualSuppPoints    =  8
 
 min_qual = -1
 max_qual = 40
index 638f41e..e2dfa85 100644 (file)
@@ -72,7 +72,6 @@ class SIQP:
       totalQualSP = Configuration.totalQualSuppPoints
       numQPlifs = Configuration.numQualPlifs 
 
-
       #lengthGroupParam  = 5*1e-9
       #spliceGroupParam  = 1e-8
 
@@ -96,8 +95,8 @@ class SIQP:
          self.P[j,j]     += cfactor
          self.P[j+1,j+1] += cfactor
 
-      #cfactor = 1e-8
-      cfactor = 5e-9
+      cfactor = 1e-9
+      #cfactor = 5e-9
 
       for j in range(lengthSP,lengthSP+donSP-1):
          cfactor = spliceGroupParam
@@ -157,7 +156,7 @@ class SIQP:
       beg = lengthSP+donSP+accSP+mmatrixSP
       self.P[beg:-self.numExamples,beg:-self.numExamples]   *= qualityGroupParam
 
-      self.P[0:-self.numExamples,0:-self.numExamples]       *= regC
+      self.P[0:-self.numExamples,0:-self.numExamples]       *= regC*0.1
 
 
    def createRegularizer(self):
index 125c47a..e77b92e 100644 (file)
@@ -115,6 +115,58 @@ class SIQPSolver(SIQP):
       self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval = self.cpx_matrix(self.P)
       CPX.copyquad(self.env, self.lp, self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval)
 
+   def enforceMonotonicity(self, begin, end):
+      """
+      This method enforces monotonicity on the parameters indexed by begin to
+      end.
+      """
+      assert -1 < begin < end < self.numFeatures
+
+      for idx in xrange(begin,end):
+         self.enforcePairwiseMonotonicity(idx,idx+1)
+
+   def enforcePairwiseMonotonicity(self,pos1,pos2):
+      energy_deltas = IntMatrix([0.0]*self.numVariables).data 
+
+      energy_deltas[pos1] = -1.0
+      energy_deltas[pos2] = 1.0
+
+      numNewRows = 1
+      numNewCols = 0
+   
+      row_rhs = FloatMatrix([0.0]).data
+      row_sense = CharMatrix(['G']).data
+
+      row_matbeg = IntMatrix([0]).data
+
+      row_entries = [0.0]*self.numVariables
+      indices = [0]*self.numVariables
+
+      currentVal = 0.0
+      numNonZeroElems = 0
+      for idx in range(self.numFeatures):
+         currentVal = energy_deltas[idx]
+         if math.fabs(currentVal) > self.EPSILON:
+            row_entries[numNonZeroElems] = currentVal
+            indices[numNonZeroElems] = idx
+            numNonZeroElems += 1
+
+      row_matind = IntMatrix([0]*(numNonZeroElems+1)).data
+      row_matval = FloatMatrix([0.0]*(numNonZeroElems+1)).data
+
+      for pos in range(numNonZeroElems):
+         row_matind[pos] = indices[pos]
+         row_matval[pos] = row_entries[pos]
+
+      status = CPX.addrows (self.env, self.lp, numNewCols, numNewRows, numNonZeroElems+1, row_rhs,
+                                  row_sense, row_matbeg, row_matind, row_matval)
+      
+      assert status == 0, 'Error ocurred while adding constraint.'
+
+      self.addedConstraints += 1
+
+      return True
+
    def addConstraint(self, energy_deltas, example_idx):
    #def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
       """
index 1cf51b2..bff6258 100644 (file)
@@ -40,6 +40,11 @@ from qpalma.Plif import Plf
 from qpalma.tools.splicesites import getDonAccScores
 from qpalma.Configuration import *
 
+from compile_dataset import compile_d
+
+import palma.palma_utils as pu
+from palma.output_formating import print_results
+
 class para:
    pass
 
@@ -51,11 +56,8 @@ class QPalma:
    def __init__(self):
       self.ARGS = Param()
 
-      #from compile_dataset import compile_d
       #compile_d(Conf.gff_fn,Conf.dna_flat_fn,Conf.filtered_fn,Conf.remapped_fn,Conf.tmp_dir,Conf.dataset_fn,False)
-
-      from compile_dataset import compile_d
-      compile_d(Conf.gff_fn,Conf.dna_flat_fn,Conf.filtered_fn,Conf.remapped_fn,Conf.tmp_dir,Conf.dataset_fn,True)
+      #compile_d(Conf.gff_fn,Conf.dna_flat_fn,Conf.filtered_fn,Conf.remapped_fn,Conf.tmp_dir,Conf.dataset_fn,True)
 
       #gen_file= '%s/genome.config' % self.ARGS.basedir
       #ginfo_filename = 'genome_info.pickle'
@@ -125,6 +127,8 @@ class QPalma:
       else:
          assert(False)
 
+      print 'leaving constructor...'
+
    def plog(self,string):
       self.logfh.write(string)
       self.logfh.flush()
@@ -163,12 +167,22 @@ class QPalma:
       # Set the parameters such as limits penalties for the Plifs
       [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
 
-      pdb.set_trace()
+      #pdb.set_trace()
+
+      lengthSP    = Conf.numLengthSuppPoints
+      donSP       = Conf.numDonSuppPoints
+      accSP       = Conf.numAccSuppPoints
+      mmatrixSP   = Conf.sizeMatchmatrix[0]\
+      *Conf.sizeMatchmatrix[1]
+      numq        = Conf.numQualSuppPoints
+      totalQualSP = Conf.totalQualSuppPoints
 
       # Initialize solver 
       if Conf.USE_OPT:
          self.plog('Initializing problem...\n')
          solver = SIQPSolver(Conf.numFeatures,numExamples,Conf.C,self.logfh)
+         solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
+         solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
 
       # stores the number of alignments done for each example (best path, second-best path etc.)
       num_path = [anzpath]*numExamples
@@ -180,14 +194,6 @@ class QPalma:
       #############################################################################################
       self.plog('Starting training...\n')
 
-      donSP       = Conf.numDonSuppPoints
-      accSP       = Conf.numAccSuppPoints
-      lengthSP    = Conf.numLengthSuppPoints
-      mmatrixSP   = Conf.sizeMatchmatrix[0]\
-      *Conf.sizeMatchmatrix[1]
-      numq        = Conf.numQualSuppPoints
-      totalQualSP = Conf.totalQualSuppPoints
-
       currentPhi = zeros((Conf.numFeatures,1))
       totalQualityPenalties = zeros((totalQualSP,1))
 
@@ -397,12 +403,12 @@ class QPalma:
                AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
 
                # Check wether scalar product + loss equals viterbi score
-               #print 'Example nr.: %d, path nr. %d, scores: %f vs %f' % (exampleIdx,pathNr,newDPScores[pathNr,0], AlignmentScores[pathNr+1])
 
                distinct_scores = False
                if math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) > 1e-5:
                   distinct_scores = True
                
+               # Check wether scalar product + loss equals viterbi score
                if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
                   pdb.set_trace()
 
@@ -441,7 +447,7 @@ class QPalma:
                #
 
             # call solver every nth example //added constraint
-            if exampleIdx != 0 and exampleIdx % 100 == 0 and Conf.USE_OPT:
+            if exampleIdx != 0 and exampleIdx % Conf.numConstraintsPerRound == 0 and Conf.USE_OPT:
                objValue,w,self.slacks = solver.solve()
 
                if math.fabs(objValue - self.oldObjValue) <= 1e-6:
@@ -456,6 +462,8 @@ class QPalma:
                sum_xis = 0
                for elem in self.slacks:
                   sum_xis +=  elem
+
+               print 'sum of slacks is %f'% sum_xis 
    
                for i in range(len(param)):
                   param[i] = w[i]
@@ -643,6 +651,12 @@ class QPalma:
          c_WeightMatch  = QPalmaDP.createIntArrayFromList([0]*(mm_len*1))
          c_DPScores     = QPalmaDP.createDoubleArrayFromList([.0]*1)
 
+         result_len = currentAlignment.getResultLength()
+         c_dna_array = QPalmaDP.createIntArrayFromList([0]*(result_len))
+         c_est_array = QPalmaDP.createIntArrayFromList([0]*(result_len))
+
+         currentAlignment.getAlignmentArrays(c_dna_array,c_est_array)
+
          c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Conf.totalQualSuppPoints))
 
          currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
@@ -680,6 +694,21 @@ class QPalma:
          true_map[0] = 1
          pathNr = 0
 
+         dna_array = [0.0]*result_len
+         est_array = [0.0]*result_len
+
+         for r_idx in range(result_len):
+            dna_array[r_idx] = c_dna_array[r_idx]
+            est_array[r_idx] = c_est_array[r_idx]
+
+         _newSpliceAlign = newSpliceAlign.flatten().tolist()[0]
+         _newEstAlign = newEstAlign.flatten().tolist()[0]
+
+         #line1,line2,line3 = pprint_alignment(_newSpliceAlign,_newEstAlign, dna_array, est_array)
+         #self.plog(line1+'\n')
+         #self.plog(line2+'\n')
+         #self.plog(line3+'\n')
+
          weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign.flatten().tolist()[0], don_supp, acc_supp)
 
          decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
@@ -700,7 +729,7 @@ class QPalma:
          if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
             true_map[pathNr+1] = 1
 
-         e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+         e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons = self.evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos,exampleIdx)
 
          if e1_b_off != None:
             exon1Begin.append(e1_b_off)
@@ -710,10 +739,32 @@ class QPalma:
          else:
             allWrongExons.append((newExons,exons))
 
+
+         logfile = self.logfh
+
+         if e1_b_off == 0 and e1_e_off == 0 and e2_b_off == 0 and e2_e_off == 0:
+            print >> logfile, 'example %d correct' % exampleIdx
+         else:
+            print >> logfile, 'example %d wrong' % exampleIdx
+
       e1Begin_pos,e1Begin_neg,e1End_pos,e1End_neg,mean_e1Begin_neg,mean_e1End_neg = self.evaluatePositions(exon1Begin,exon1End)
       e2Begin_pos,e2Begin_neg,e2End_pos,e2End_neg,mean_e2Begin_neg,mean_e2End_neg = self.evaluatePositions(exon2Begin,exon2End)
 
-      print 'Total num. of Examples: %d' % numExamples
+      all_pos_correct = 0
+      for idx in range(len(exon1Begin)):
+         if exon1Begin[idx] == 0 and exon1End[idx] == 0\
+         and exon2Begin[idx] == 0 and exon2End[idx] == 0:
+            all_pos_correct += 1
+
+      logfile = self.logfh
+      print >> logfile, 'Total num. of examples: %d' % numExamples
+      print >> logfile, 'Number of total correct examples: %d' % all_pos_correct
+      print >> logfile, 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
+      print >> logfile, 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
+      print >> logfile, 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
+
+      print 'Total num. of examples: %d' % numExamples
+      print 'Number of total correct examples: %d' % all_pos_correct
       print 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
       print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
       print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
@@ -722,6 +773,7 @@ class QPalma:
       self.logfh.close()
 
    def evaluatePositions(self,eBegin,eEnd):
+
       eBegin_pos = [elem for elem in eBegin if elem == 0]
       eBegin_neg = [elem for elem in eBegin if elem != 0]
       eEnd_pos = [elem for elem in eEnd if elem == 0]
@@ -747,6 +799,41 @@ class QPalma:
 
       return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg
 
+   def evaluateExample(self,dna,est,exons,SpliceAlign,newEstAlign,spos,exampleIdx):
+      newExons = []
+      oldElem = -1
+      SpliceAlign = SpliceAlign.flatten().tolist()[0]
+      SpliceAlign.append(-1)
+      for pos,elem in enumerate(SpliceAlign):
+         if pos == 0:
+            oldElem = -1
+         else:
+            oldElem = SpliceAlign[pos-1]
+
+         if oldElem != 0 and elem == 0: # start of exon
+            newExons.append(pos)
+
+         if oldElem == 0 and elem != 0: # end of exon
+            newExons.append(pos)
+
+      e1_b_off,e1_e_off,e2_b_off,e2_e_off = 0,0,0,0
+
+      self.plog(("Example %d"%exampleIdx) + str(exons) + " "+  str(newExons) + "\n")
+      self.plog(("SpliceAlign " + str(SpliceAlign)+ "\n"))
+
+      if len(newExons) == 4:
+         e1_begin,e1_end = newExons[0],newExons[1]
+         e2_begin,e2_end = newExons[2],newExons[3]
+      else:
+         return None,None,None,None,newExons
+
+      e1_b_off = int(math.fabs(e1_begin - exons[0,0]))
+      e1_e_off = int(math.fabs(e1_end - exons[0,1]))
+
+      e2_b_off = int(math.fabs(e2_begin - exons[1,0]))
+      e2_e_off = int(math.fabs(e2_end - exons[1,1]))
+
+      return e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons
 
 def fetch_genome_info(ginfo_filename):
    if not os.path.exists(ginfo_filename):
@@ -768,38 +855,6 @@ def fetch_genome_info(ginfo_filename):
    else:
       return cPickle.load(open(ginfo_filename))
 
-def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign,spos):
-   newExons = []
-   oldElem = -1
-   SpliceAlign = SpliceAlign.flatten().tolist()[0]
-   SpliceAlign.append(-1)
-   for pos,elem in enumerate(SpliceAlign):
-      if pos == 0:
-         oldElem = -1
-      else:
-         oldElem = SpliceAlign[pos-1]
-
-      if oldElem != 0 and elem == 0: # start of exon
-         newExons.append(pos)
-
-      if oldElem == 0 and elem != 0: # end of exon
-         newExons.append(pos)
-
-   e1_b_off,e1_e_off,e2_b_off,e2_e_off = 0,0,0,0
-
-   if len(newExons) == 4:
-      e1_begin,e1_end = newExons[0],newExons[1]
-      e2_begin,e2_end = newExons[2],newExons[3]
-   else:
-      return None,None,None,None,newExons
-
-   e1_b_off = int(math.fabs(e1_begin - exons[0,0]))
-   e1_e_off = int(math.fabs(e1_end - exons[0,1]))
-
-   e2_b_off = int(math.fabs(e2_begin - exons[1,0]))
-   e2_e_off = int(math.fabs(e2_end - exons[1,1]))
-
-   return e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons
 
 
 def calcStat(Acceptor,Donor,Exons):
@@ -848,6 +903,71 @@ def calcStat(Acceptor,Donor,Exons):
       maxDon = max(max(don),maxDon)
       minDon = min(min(don),minDon)
 
+def pprint_alignment(_newSpliceAlign,_newEstAlign, dna_array, est_array):
+   (qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds, tExonSizes, tStarts, tEnds) =\
+         pu.get_splice_info(_newSpliceAlign,_newEstAlign)
+
+   t_strand = '+'
+   translation = '-ACGTN_'    #how aligned est and dna sequence is displayed
+                              #(gap_char, 5 nucleotides, intron_char) 
+   comparison_char = '| '     #first: match_char, second: intron_char
+
+   (exon_length, identity, ssQuality, exonQuality, comparison, qIDX, tIDX) =\
+             pu.comp_identity(dna_array, est_array, t_strand, qStart, tStart, translation, comparison_char)
+
+   first_identity = None
+   last_identity = None 
+   for i in range(len(comparison)):
+    if comparison[i] == '|' and first_identity is None: 
+      first_identity = i 
+    if comparison[-i] == '|' and last_identity is None: 
+      last_identity = len(comparison) - i - 1
+
+   try:
+      for idx in range(len(dna_array)):
+         dna_array[idx] = translation[int(dna_array[idx])]
+         est_array[idx] = translation[int(est_array[idx])]
+   except:
+      pdb.set_trace()
+
+   line1 = "".join(dna_array)
+   line2 = comparison
+   line3 = "".join(est_array)
+
+   return line1,line2,line3
+
+   #exon_length[-1] = exon_length[-1] - tEnd + tIDX[last_identity] + 1
+
+   #tStart = tIDX[first_identity]
+   #tStarts[0] = tIDX[first_identity]
+   #tEnd = tIDX[last_identity]
+   #tEnds[-1] = tIDX[last_identity]
+   #qStart = qIDX[first_identity]
+   #qStarts[0] = qIDX[first_identity]
+   #qEnd = qIDX[last_identity]
+   #qEnds[-1] = qIDX[last_identity]
+
+   #align_info.qStart = qStart
+   #align_info.qEnd = qEnd
+   #align_info.tStart = tStart
+   #align_info.tEnd = tEnd
+   #align_info.num_exons = num_exons
+   #align_info.qExonSizes = qExonSizes
+   #align_info.qStarts = qStarts
+   #align_info.qEnds = qEnds
+   #align_info.tExonSizes =tExonSizes
+   #align_info.tStarts = tStarts
+   #align_info.tEnds = tEnds
+   #align_info.exon_length = exon_length
+   #align_info.identity = identity
+   #align_info.ssQuality = ssQuality
+   #align_info.exonQuality = exonQuality
+   #align_info.comparison = comparison
+   #align_info.qIDX = qIDX
+   #align_info.tIDX = tIDX 
+   #align_info.dna_letters = dna_letters
+   #align_info.est_letters = est_letters
+
 
 if __name__ == '__main__':
    qpalma = QPalma()
index 99c24ff..667000f 100644 (file)
@@ -69,8 +69,9 @@ def plot_param(filename,train_with_intronlengthinformation=1):
 
     # plot quality score transformations
     pylab.figure()
-    for plif in qualityPlifs:
-        pylab.plot(plif.limits,plif.penalties,'b^-',hold=True)
+    for pos,plif in enumerate(qualityPlifs):
+      if pos in [1,8,15,22]:
+         pylab.plot(plif.limits,plif.penalties,'b^-',hold=True)
     pylab.xlabel('quality value',fontsize=20)
     pylab.ylabel('transition score',fontsize=20)