+ update makefiles to fetch automatically valid Python includes and libs
[qpalma.git] / qpalma / qpalma_main.py
index 1564ad8..daf1975 100644 (file)
@@ -17,7 +17,7 @@ from numpy.matlib import mat,zeros,ones,inf
 from numpy.linalg import norm
 
 #from qpalma.SIQP_CPX import SIQPSolver
-#from qpalma.SIQP_CVXOPT import SIQPSolver
+from qpalma.SIQP_CVXOPT import SIQPSolver
 
 import QPalmaDP
 import qpalma
@@ -55,6 +55,7 @@ def preprocessExample(training_set,exampleKey,seqInfo,settings):
 
    if exons.shape == (2,2):
       fetched_dna_subseq = dna[exons[0,0]:exons[0,1]] + dna[exons[1,0]:exons[1,1]]
+      print fetched_dna_subseq
      
       donor_elem = dna[exons[0,1]:exons[0,1]+2]
       acceptor_elem = dna[exons[1,0]-2:exons[1,0]]
@@ -84,7 +85,9 @@ def performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,cur
    matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
    mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
+   don_len = len(donor)
    donor = QPalmaDP.createDoubleArrayFromList(donor)
+   acc_len = len(acceptor)
    acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
 
    # Create the alignment object representing the interface to the C/C++ code.
@@ -94,8 +97,8 @@ def performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,cur
 
    # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
    currentAlignment.myalign( current_num_path, dna, len(dna),\
-    read, len(read), prb, chastity, ps, matchmatrix, mm_len, donor, len(donor),\
-    acceptor, len(acceptor), c_qualityPlifs, settings['remove_duplicate_scores'],\
+    read, len(read), prb, chastity, ps, matchmatrix, mm_len, donor, don_len,\
+    acceptor, acc_len, c_qualityPlifs, settings['remove_duplicate_scores'],\
     settings['print_matrix'] )
 
    if prediction_mode:
@@ -124,15 +127,19 @@ class QPalma:
       self.ARGS = Param()
       self.qpalma_debug_mode = dmode
       self.seqInfo = seqInfo
+      self.logfh = None
 
 
    def plog(self,string):
-      self.logfh.write(string)
-      self.logfh.flush()
+      if self.logfh != None:
+         self.logfh.write(string)
+         self.logfh.flush()
+      else:
+         print string
 
 
-   def init_training(self,dataset_fn,training_keys,settings,set_name):
-      full_working_path = jp(settings['training_dir'],run_name)
+   def init_training(self,dataset_fn,settings,set_name):
+      full_working_path = jp(settings['training_dir'],set_name)
 
       #assert not os.path.exists(full_working_path)
       if not os.path.exists(full_working_path):
@@ -172,11 +179,14 @@ class QPalma:
       totalQualSP = settings['totalQualSuppPoints']
 
       # calculate the total number of features
-      numFeatures = lengthSP+donSP+accSP+mmatrixSP*numq
+      numFeatures = settings['numFeatures']
 
       # Initialize parameter vector
       param = numpy.matlib.rand(numFeatures,1)
 
+      # we take the first quality vector of the tuple of quality vectors
+      quality_index = 0
+
       # no intron length model
       if not settings['enable_intron_length']:
          param[:lengthSP] *= 0.0
@@ -191,13 +201,14 @@ class QPalma:
          solver = SIQPSolver(numFeatures,numExamples,settings['C'],self.logfh,settings)
       except:
          self.plog('Got no license. Telling queue to reschedule job...\n')
+         self.plog(sys.exc_info())
          sys.exit(99)
 
-      solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
-      solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
+      #solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
+      #solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
 
       # stores the number of alignments done for each example (best path, second-best path etc.)
-      num_path = settings['anzpath']*numExamples
+      num_path = [settings['anzpath']]*numExamples
 
       currentPhi = zeros((numFeatures,1))
       totalQualityPenalties = zeros((totalQualSP,1))
@@ -218,11 +229,12 @@ class QPalma:
          if iteration_nr == settings['iter_steps']:
             break
 
+         #print 'Got %d examples' % len(training_set.keys())
          for exampleIdx,example_key in enumerate(training_set.keys()):
             print 'Current example %d' % example_key
 
             try:
-               dna,est,acc_supp,don_supp,exons,original_est,currentQualities =\
+               dna,read,acc_supp,don_supp,exons,original_read,currentQualities =\
                preprocessExample(training_set,example_key,self.seqInfo,settings)
             except SpliceSiteException:
                continue
@@ -244,13 +256,14 @@ class QPalma:
             # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
             if settings['enable_quality_scores']:
                trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
-               computeSpliceAlignWithQuality(dna, exons, est, original_est,\
+               computeSpliceAlignWithQuality(dna, exons, read, original_read,\
                quality, qualityPlifs,settings)
             else:
                trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
 
             dna_calc = dna_calc.replace('-','')
 
+
             #print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron =\
@@ -285,15 +298,16 @@ class QPalma:
 
             ps = h.convert2SWIG()
 
-            newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
-            newQualityPlifsFeatures, unneeded1, unneeded2 =\
-            performAlignment(dna,est,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
+            newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures, unused1, unused2 =\
+            performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
+
             mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
-            newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],len(dna))
-            newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
+            # check for correct reshaping !!
+            newSpliceAlign = numpy.mat(newSpliceAlign).reshape(num_path[exampleIdx],len(dna))
+            newWeightMatch = numpy.mat(newWeightMatch).reshape(num_path[exampleIdx],mm_len)
 
-            newQualityPlifsFeatures = newQualityPlifsFeatures.reshape(num_path[exampleIdx],settings['totalQualSuppPoints'])
+            newQualityPlifsFeatures = numpy.mat(newQualityPlifsFeatures).reshape(num_path[exampleIdx],settings['totalQualSuppPoints'])
             # Calculate weights of the respective alignments. Note that we are calculating n-best alignments without 
             # hamming loss, so we have to keep track which of the n-best alignments correspond to the true one in order 
             # not to incorporate a true alignment in the
@@ -327,12 +341,12 @@ class QPalma:
                   distinct_scores = True
 
                # Check wether scalar product + loss equals viterbi score
-               if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
+               if not math.fabs(newDPScores[pathNr] - AlignmentScores[pathNr+1]) <= 1e-5:
                   self.plog("Scalar prod. + loss not equals Viterbi output!\n")
                   pdb.set_trace()
 
                self.plog(" scalar prod (correct) : %f\n"%AlignmentScores[0])
-               self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr,0],AlignmentScores[pathNr+1]))
+               self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr],AlignmentScores[pathNr+1]))
 
                # if the pathNr-best alignment is very close to the true alignment consider it as true
                if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
@@ -362,11 +376,7 @@ class QPalma:
 
                if False:
                   self.plog("Is considered as: %d\n" % true_map[1])
-
-                  #result_len = currentAlignment.getResultLength()
-
                   dna_array,est_array = currentAlignment.getAlignmentArraysNew()
-
                   _newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
                   _newEstAlign = newEstAlign[0].flatten().tolist()[0]
 
@@ -383,7 +393,8 @@ class QPalma:
                # end of one example processing 
 
             # call solver every nth example / added constraint
-            if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
+            #if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
+            if True:
                objValue,w,self.slacks = solver.solve()
                solver_call_ctr += 1
 
@@ -419,20 +430,21 @@ class QPalma:
          self.plog("suboptimal rounds %d\n" %suboptimal_example)
 
          if self.noImprovementCtr == numExamples*2:
-            FinalizeTraining(param,'param_%d.pickle'%param_idx)
+            self.FinalizeTraining(param,'param_%d.pickle'%param_idx)
 
          iteration_nr += 1
 
       #
       # end of optimization 
       #  
-      FinalizeTraining(param,'param_%d.pickle'%param_idx)
+      self.FinalizeTraining(param,'param_%d.pickle'%param_idx)
 
 
-   def FinalizeTraining(self,vector,name):
+   def FinalizeTraining(self,param,name):
       self.plog("Training completed")
       cPickle.dump(param,open(name,'w+'))
-      self.logfh.close()
+      if self.logfh != None:
+         self.logfh.close()
    
 
 ###############################################################################