+ set proper logging in optimizer code
authorFabio <fabio@congo.fml.local>
Tue, 21 Oct 2008 12:04:23 +0000 (14:04 +0200)
committerFabio <fabio@congo.fml.local>
Tue, 21 Oct 2008 12:04:23 +0000 (14:04 +0200)
+ cleaned up testcases a bit
+ minor modifications and cleanups

qpalma/DatasetUtils.py
qpalma/SIQP_CVXOPT.py
qpalma/SettingsParser.py
qpalma/qpalma_main.py
scripts/qpalma_pipeline.py
test.conf
tests/test_qpalma.py
tests/test_qpalma_installation.py

index 64d4d22..6c41062 100644 (file)
@@ -116,6 +116,8 @@ def generateTrainingDataset(settings):
       relative_exons = exons - up_cut
 
       assert checkExons(dna,relative_exons,readAlignment,id)
+
+      currentSeqInfo = (id,chromo)
       
       dataset.setdefault(id, []).append((currentSeqInfo,readAlignment,[prb],exons))
 
index 95260e9..75a14f3 100644 (file)
@@ -16,9 +16,6 @@ import pdb
 import cvxopt.base as cb
 import cvxopt.solvers as cs
 
-import logging
-logging.basicConfig(level=logging.DEBUG,format='%(levelname)s %(message)s')
-
 from SIQP import SIQP
 
 acc = 1e-7
@@ -34,6 +31,8 @@ cs.options['reltol']        = acc      # default: 1e-7
 cs.options['feastol']       = acc      # default: 1e-7
 #cs.options['refinement']    = True     # default: True 
 
+
+
 class SIQPSolver(SIQP):
    """
    This class is a wrapper for the cvxopt qp solver.
@@ -42,18 +41,26 @@ class SIQPSolver(SIQP):
    
    """
 
-   def __init__(self,fSize,numExamples,c,proto,settings):
+   def __init__(self,fSize,numExamples,c,logfh,settings):
       SIQP.__init__(self,fSize,numExamples,c,settings)
+
       self.numConstraints = 0
       self.solver_runs = 0
       self.old_objective_value = -(sys.maxint-1)
-      self.protocol = proto
+      self.logfh = logfh
 
       #self.P.tofile(open('matrix_P_%d_%d'%(self.P.size[0],self.P.size[1]),'w+'))
       #self.q.tofile(open('matrix_q_%d_%d'%(self.q.size[0],self.q.size[1]),'w+'))
       #self.G.tofile(open('matrix_G_%d_%d'%(self.G.size[0],self.G.size[1]),'w+'))
       #self.h.tofile(open('matrix_h_%d_%d'%(self.h.size[0],self.h.size[1]),'w+'))
 
+   def plog(self,string):
+      if self.logfh != None:
+         self.logfh.write(string)
+         self.logfh.flush()
+      else:
+         print string
+
    def addConstraint(self, energy_deltas, idx):
       energy_deltas = cb.matrix(energy_deltas)
       loss = 1.0
@@ -76,13 +83,12 @@ class SIQPSolver(SIQP):
          if useMarginRescaling:
              if scalar_prod[0] + old_slack > loss: # leave some room for inaccuracies
                  print 'Current example already fulfills constraint.'
-                 print >> self.protocol,  'Current example already fulfills constraint.'
+                 self.plog('Current example already fulfills constraint.\n')
                  return False
          else:
-             print >> self.protocol, "constraint at current solution: %f <= -1+%f/%f = %f" %(scalar_prod[0], old_slack, loss, -1 + old_slack/loss) 
+             self.plog("constraint at current solution: %f <= -1+%f/%f = %f" %(scalar_prod[0], old_slack, loss, -1 + old_slack/loss) )
              if scalar_prod[0] < -1+old_slack/loss + 1e-6: # leave some room for inaccuracies
-                 print >> self.protocol,  'Current example already fulfills constraint.'
-                 logging.debug('Current example already fulfills constraint.')
+                 self.plog('Current example already fulfills constraint.\n')
                  return False
 
       rows,cols = self.G.size
@@ -112,7 +118,7 @@ class SIQPSolver(SIQP):
       self.h = self.new_h
 
       self.numConstraints = self.G.size[0]
-      print >> self.protocol, 'number of constraints is %d' % self.numConstraints
+      self.plog('number of constraints is %d\n' % self.numConstraints)
 
       assert self.G.size[0] == self.h.size[0]
 
@@ -122,14 +128,18 @@ class SIQPSolver(SIQP):
       d = cs.qp(self.P,self.q,self.G,self.h)
       self.solver_runs += 1
 
-      print >> self.protocol, 'Return status of solver: ' + d['status']
+      self.plog('Return status of solver: ' + d['status'])
       new_w = d['x']
       self.old_w = new_w
-      print >> self.protocol, 'slacks are'
+      self.plog('slacks are:\n')
+
+      slacks_string = ''
       for i in range(self.numFeatures,new_w.size[0]):
-         print >> self.protocol, "%e " % new_w[i],
+         slacks_string += ('%e ' % new_w[i])
          assert new_w[i] >= 0, 'Error found a negative slack variable'
-      print >> self.protocol, "end of slacks"
+
+      self.plog(slacks_string+'\n')
+      self.plog('end of slacks\n')
 
       obj_value = 0.5 * new_w.T*self.P*new_w + self.q.T*new_w
       obj_value = obj_value[0]
@@ -138,10 +148,10 @@ class SIQPSolver(SIQP):
       regularizationPart = (0.5 *new_w.T*self.P*new_w)[0]
       lossPart = (self.q.T*new_w)[0]
       assert lossPart >= 0.0, 'Error we have a negative loss contribution'
-      print >> self.protocol, 'Parts of objective: %e %e'%(regularizationPart,lossPart)
+      self.plog('Parts of objective: %e %e\n'%(regularizationPart,lossPart))
 
       print 'SIQP_CVXOPT: Objective is: %e'%obj_value
-      print >> self.protocol, 'SIQP_CVXOPT: Objective is: %e'%obj_value
+      self.plog('SIQP_CVXOPT: Objective is: %e\n'%obj_value)
 
       print self.old_objective_value 
       print obj_value 
@@ -153,28 +163,3 @@ class SIQPSolver(SIQP):
       #assert ( scalar_prod <= new_w[self.numFeatures+self.currentIdx] )
 
       return obj_value,new_w[0:self.numFeatures,0],new_w[self.numFeatures:,0]
-
-def test():
-   fh = open('siqp_cvxopt.log','w+')
-   numFeatures = 3
-   numExamples = 2
-   s = SIQPSolver(numFeatures,numExamples,100.0,fh)
-
-   d1 = cb.matrix([1,0,2],(numFeatures,1))
-   d2 = cb.matrix([1,1,0],(numFeatures,1))
-
-   s.addConstraint(d1,10.0,0,True)
-   s.addConstraint(d2,105.0,1,True)
-
-   w,slacks = s.solve()
-   print w
-   print slacks
-
-   print w.T * d1 + slacks[0]
-   print w.T * d2 + slacks[1]
-
-   del s
-   fh.close()
-
-if __name__ == '__main__':
-   test()
index 6ff86ff..da4d91a 100644 (file)
@@ -94,7 +94,8 @@ def makeSettings(settings):
    # now convert and check all integer parameters
    for parameter in ['matchmatrixCols', 'matchmatrixRows', 'numAccSuppPoints', 'numConstraintsPerRound',\
    'numDonSuppPoints', 'numLengthSuppPoints', 'numQualPlifs', 'numQualSuppPoints', 'anzpath', 'iter_steps',\
-   'max_intron_len', 'max_qual', 'min_intron_len', 'min_qual', 'totalQualSuppPoints','C','num_splits','prb_offset','half_window_size']:
+   'max_intron_len', 'max_qual', 'min_intron_len', 'min_qual', 'totalQualSuppPoints','C','num_splits','prb_offset',\
+   'half_window_size']:
          
       try:
          settings[parameter] = int(settings[parameter])
@@ -112,6 +113,8 @@ def makeSettings(settings):
    settings['numFeatures'] = settings['numLengthSuppPoints'] + settings['numAccSuppPoints'] + settings['numDonSuppPoints']\
    + settings['matchmatrixCols']*settings['matchmatrixRows'] + settings['numQualPlifs']*settings['numQualSuppPoints']
 
+   settings['totalQualSuppPoints'] = settings['numQualPlifs']*settings['numQualSuppPoints']
+
    return settings
 
 
index 1564ad8..afc32d1 100644 (file)
@@ -17,7 +17,7 @@ from numpy.matlib import mat,zeros,ones,inf
 from numpy.linalg import norm
 
 #from qpalma.SIQP_CPX import SIQPSolver
-#from qpalma.SIQP_CVXOPT import SIQPSolver
+from qpalma.SIQP_CVXOPT import SIQPSolver
 
 import QPalmaDP
 import qpalma
@@ -55,6 +55,7 @@ def preprocessExample(training_set,exampleKey,seqInfo,settings):
 
    if exons.shape == (2,2):
       fetched_dna_subseq = dna[exons[0,0]:exons[0,1]] + dna[exons[1,0]:exons[1,1]]
+      print fetched_dna_subseq
      
       donor_elem = dna[exons[0,1]:exons[0,1]+2]
       acceptor_elem = dna[exons[1,0]-2:exons[1,0]]
@@ -84,7 +85,9 @@ def performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,cur
    matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
    mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
+   don_len = len(donor)
    donor = QPalmaDP.createDoubleArrayFromList(donor)
+   acc_len = len(acceptor)
    acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
 
    # Create the alignment object representing the interface to the C/C++ code.
@@ -94,8 +97,8 @@ def performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,cur
 
    # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
    currentAlignment.myalign( current_num_path, dna, len(dna),\
-    read, len(read), prb, chastity, ps, matchmatrix, mm_len, donor, len(donor),\
-    acceptor, len(acceptor), c_qualityPlifs, settings['remove_duplicate_scores'],\
+    read, len(read), prb, chastity, ps, matchmatrix, mm_len, donor, don_len,\
+    acceptor, acc_len, c_qualityPlifs, settings['remove_duplicate_scores'],\
     settings['print_matrix'] )
 
    if prediction_mode:
@@ -124,11 +127,15 @@ class QPalma:
       self.ARGS = Param()
       self.qpalma_debug_mode = dmode
       self.seqInfo = seqInfo
+      self.logfh = None
 
 
    def plog(self,string):
-      self.logfh.write(string)
-      self.logfh.flush()
+      if self.logfh != None:
+         self.logfh.write(string)
+         self.logfh.flush()
+      else:
+         print string
 
 
    def init_training(self,dataset_fn,training_keys,settings,set_name):
@@ -172,11 +179,14 @@ class QPalma:
       totalQualSP = settings['totalQualSuppPoints']
 
       # calculate the total number of features
-      numFeatures = lengthSP+donSP+accSP+mmatrixSP*numq
+      numFeatures = settings['numFeatures']
 
       # Initialize parameter vector
       param = numpy.matlib.rand(numFeatures,1)
 
+      # we take the first quality vector of the tuple of quality vectors
+      quality_index = 0
+
       # no intron length model
       if not settings['enable_intron_length']:
          param[:lengthSP] *= 0.0
@@ -191,13 +201,14 @@ class QPalma:
          solver = SIQPSolver(numFeatures,numExamples,settings['C'],self.logfh,settings)
       except:
          self.plog('Got no license. Telling queue to reschedule job...\n')
+         self.plog(sys.exc_info())
          sys.exit(99)
 
-      solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
-      solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
+      #solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
+      #solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
 
       # stores the number of alignments done for each example (best path, second-best path etc.)
-      num_path = settings['anzpath']*numExamples
+      num_path = [settings['anzpath']]*numExamples
 
       currentPhi = zeros((numFeatures,1))
       totalQualityPenalties = zeros((totalQualSP,1))
@@ -218,11 +229,12 @@ class QPalma:
          if iteration_nr == settings['iter_steps']:
             break
 
+         #print 'Got %d examples' % len(training_set.keys())
          for exampleIdx,example_key in enumerate(training_set.keys()):
             print 'Current example %d' % example_key
 
             try:
-               dna,est,acc_supp,don_supp,exons,original_est,currentQualities =\
+               dna,read,acc_supp,don_supp,exons,original_read,currentQualities =\
                preprocessExample(training_set,example_key,self.seqInfo,settings)
             except SpliceSiteException:
                continue
@@ -244,7 +256,7 @@ class QPalma:
             # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
             if settings['enable_quality_scores']:
                trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
-               computeSpliceAlignWithQuality(dna, exons, est, original_est,\
+               computeSpliceAlignWithQuality(dna, exons, read, original_read,\
                quality, qualityPlifs,settings)
             else:
                trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
@@ -287,13 +299,14 @@ class QPalma:
 
             newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
             newQualityPlifsFeatures, unneeded1, unneeded2 =\
-            performAlignment(dna,est,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
+            performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
             mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
-            newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],len(dna))
-            newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
+            # check for correct reshaping !!
+            newSpliceAlign = numpy.mat(newSpliceAlign).reshape(num_path[exampleIdx],len(dna))
+            newWeightMatch = numpy.mat(newWeightMatch).reshape(num_path[exampleIdx],mm_len)
 
-            newQualityPlifsFeatures = newQualityPlifsFeatures.reshape(num_path[exampleIdx],settings['totalQualSuppPoints'])
+            newQualityPlifsFeatures = numpy.mat(newQualityPlifsFeatures).reshape(num_path[exampleIdx],settings['totalQualSuppPoints'])
             # Calculate weights of the respective alignments. Note that we are calculating n-best alignments without 
             # hamming loss, so we have to keep track which of the n-best alignments correspond to the true one in order 
             # not to incorporate a true alignment in the
@@ -327,12 +340,12 @@ class QPalma:
                   distinct_scores = True
 
                # Check wether scalar product + loss equals viterbi score
-               if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
+               if not math.fabs(newDPScores[pathNr] - AlignmentScores[pathNr+1]) <= 1e-5:
                   self.plog("Scalar prod. + loss not equals Viterbi output!\n")
                   pdb.set_trace()
 
                self.plog(" scalar prod (correct) : %f\n"%AlignmentScores[0])
-               self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr,0],AlignmentScores[pathNr+1]))
+               self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr],AlignmentScores[pathNr+1]))
 
                # if the pathNr-best alignment is very close to the true alignment consider it as true
                if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
@@ -383,7 +396,7 @@ class QPalma:
                # end of one example processing 
 
             # call solver every nth example / added constraint
-            if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
+            #if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
                objValue,w,self.slacks = solver.solve()
                solver_call_ctr += 1
 
@@ -419,20 +432,21 @@ class QPalma:
          self.plog("suboptimal rounds %d\n" %suboptimal_example)
 
          if self.noImprovementCtr == numExamples*2:
-            FinalizeTraining(param,'param_%d.pickle'%param_idx)
+            self.FinalizeTraining(param,'param_%d.pickle'%param_idx)
 
          iteration_nr += 1
 
       #
       # end of optimization 
       #  
-      FinalizeTraining(param,'param_%d.pickle'%param_idx)
+      self.FinalizeTraining(param,'param_%d.pickle'%param_idx)
 
 
-   def FinalizeTraining(self,vector,name):
+   def FinalizeTraining(self,param,name):
       self.plog("Training completed")
       cPickle.dump(param,open(name,'w+'))
-      self.logfh.close()
+      if self.logfh != None:
+         self.logfh.close()
    
 
 ###############################################################################
index bcb2a12..b53c7f4 100644 (file)
@@ -153,3 +153,7 @@ if __name__ == '__main__':
       system_obj.training()
    else:
       assert False
+
+   #spliced_reads_fn 
+   #unspliced_reads_fn 
+   #prediction_param_fn
index 6be32a6..9017c22 100644 (file)
--- a/test.conf
+++ b/test.conf
@@ -125,4 +125,3 @@ min_intron_len          = 20
 min_qual                = -5
 min_svm_score           = 0.0
 print_matrix            = False
-totalQualSuppPoints     = 300
index ed11832..2369f19 100644 (file)
@@ -17,56 +17,172 @@ import math
 import numpy
 from numpy import inf
 import os.path
+import shutil
 import pdb
 import array
 import unittest
 
-from qpalma.qpalma_main import QPalma
+from qpalma.qpalma_main import QPalma,preprocessExample
 from qpalma.utils import print_prediction
 
 from qpalma.Run import Run
 
 from qpalma.SettingsParser import parseSettings
+from qpalma.DatasetUtils import processQuality
 from qpalma.OutputFormat import alignment_reconstruct
-from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo,reverse_complement
+from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo
 
 from qpalma.Lookup import LookupTable
 
 jp = os.path.join
 
 
+def createTrainingSet():
+
+   dataset = {}
+
+   id       = 1111
+   chromo   = 1
+   strand   = '+'
+   up_cut   = 117
+   down_cut = 383
+
+   currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
+
+   # tcaatacaaatcctatttcttgtggttttctttccttcacttagctatggatggtttatcttcattt
+   # tcaatacaaatcctatttctt                       ctatggatggtttatcttcattt
+
+   originalRead      = 'tcaatacaaatcctatttcttctatggatggtttatcttcattt'
+   raw_qualities     = 'h'*len(originalRead)
+   prb_offset        = 64
+   quality_interval  = (-5,40)
+   perform_checks    = True
+   quality           = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
+   currentQualities  = [quality]
+
+   exons      = numpy.mat([217,238,261,284]).reshape((2,2))
+
+   dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
+
+   id       = 2222
+   up_cut   = 1560
+   down_cut = 1901
+
+   currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
+
+   # tcctttaagattttgtttttataatgtgttcttccatccacatctatctccatatgatatggaccatatcatacatcatcatttgtccaaatgcatgaatgaatttggaaataggtacgagaatgccaacaatgacaagaa
+   # tcctttaagattttgtttttataatgt                                                                                       gtacgagaatgccaacaatgacaagaa
+
+   originalRead      = 'tcctttaagattttgtttttataatgtgtacgagaatgccaacaatgacaagaa'
+   raw_qualities     = 'h'*len(originalRead)
+   prb_offset        = 64
+   quality_interval  = (-5,40)
+   perform_checks    = True
+   quality           = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
+   currentQualities  = [quality]
+
+   exons      = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
+
+   #dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
+
+   return dataset
+
+
+def createPredictionSet():
+
+   dataset = {}
+
+   id       = 1111
+   chromo   = 1
+   strand   = '+'
+   up_cut   = 117
+   down_cut = 383
+
+   currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
+
+   # tcaatacaaatcctatttcttgtggttttctttccttcacttagctatggatggtttatcttcattt
+   # tcaatacaaatcctatttctt                       ctatggatggtttatcttcattt
+
+   originalRead      = 'tcaatacaaatcctatttcttctatggatggtttatcttcattt'
+   raw_qualities     = 'h'*len(originalRead)
+   prb_offset        = 64
+   quality_interval  = (-5,40)
+   perform_checks    = True
+   quality           = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
+   currentQualities  = [quality]
+
+   exons      = numpy.mat([217,238,261,284]).reshape((2,2))
+
+   dataset.setdefault(id, []).append((currentSeqInfo,originalRead,currentQualities))
+
+   id       = 2222
+   up_cut   = 1560
+   down_cut = 1901
+
+   currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
+
+   # tcctttaagattttgtttttataatgtgttcttccatccacatctatctccatatgatatggaccatatcatacatcatcatttgtccaaatgcatgaatgaatttggaaataggtacgagaatgccaacaatgacaagaa
+   # tcctttaagattttgtttttataatgt                                                                                       gtacgagaatgccaacaatgacaagaa
+
+   originalRead      = 'tcctttaagattttgtttttataatgtgtacgagaatgccaacaatgacaagaa'
+   raw_qualities     = 'h'*len(originalRead)
+   prb_offset        = 64
+   quality_interval  = (-5,40)
+   perform_checks    = True
+   quality           = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
+   currentQualities  = [quality]
+
+   exons      = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
+
+   dataset.setdefault(id, []).append((currentSeqInfo,originalRead,currentQualities))
+
+
+   return dataset
+
+
 class TestQPalmaTraining(unittest.TestCase):
+   """
+   """
 
    def setUp(self):
+      dir = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'      
+      if os.path.exists(dir):
+         shutil.rmtree(dir)
+      os.mkdir(dir)
+
+      self.settings = parseSettings('train_testcase.conf')
+
+      accessWrapper = DataAccessWrapper(self.settings)
+      self.seqInfo = SeqSpliceInfo(accessWrapper,self.settings['allowed_fragments'])
 
-      self.settings = parseSettings('testcase.conf')
+      self.training_set = createTrainingSet()
 
 
    def testInitialization(self):
       pass
 
 
-   def test_preprocessExample(self):
-   
-      currentSeqInfo = ()
-      originalRead   = 
-      currentQualities = 
-      currentExons   = 
+   def _test_preprocessExample(self):
+      """
+      """
 
-      training_set[1] = (currentSeqInfo,originalRead,currentQualities,currentExons)
+      id = 2222
+      (currentSeqInfo,originalRead,currentQualities,exons) = self.training_set[id] 
+      print originalRead
 
-      dna,read,acc_supp,don_supp,exons,originalRead,currentQualities =\
-      preprocessExample(training_set,1,seqInfo,settings)
+      try:
+         preprocessExample(self.training_set,id,self.seqInfo,self.settings)
+      except:
+         print sys.exc_info()
 
 
-   def test_performAlignment(self):
+   def _test_performAlignment(self):
       """
       """
       #performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,current_num_path,prediction_mode,settings)
       pass
 
 
-
    def test_train(self):
       """
       This function
@@ -74,7 +190,7 @@ class TestQPalmaTraining(unittest.TestCase):
 
       set_name = 'test_train'
       
-      qp = QPalma(seqInfo,True)
+      qp = QPalma(self.seqInfo,True)
       qp.train(self.training_set,self.settings,set_name)
 
 
@@ -85,49 +201,89 @@ class TestQPalmaPrediction(unittest.TestCase):
    """
 
       
-   def _setUp(self):
-      self.prediction_set = {}
+   def setUp(self):
+      """
+      """
+
+      dir = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'      
+      if os.path.exists(dir):
+         shutil.rmtree(dir)
+      os.mkdir(dir)
+
+      self.settings = parseSettings('train_testcase.conf')
+
+      accessWrapper = DataAccessWrapper(self.settings)
+      self.seqInfo = SeqSpliceInfo(accessWrapper,self.settings['allowed_fragments'])
+
+      self.prediction_set = createPredictionSet()
+
+
+   def test_predictionFromTrainedParameters(self):
+      self.prediction_set
+
+      qp = QPalma(self.seqInfo,True)
+
+      self.settings['prediction_param_fn'] = 'param_0.pickle'
+
+      allPredictions = qp.predict(self.prediction_set,self.settings)
+
+      for current_prediction in allPredictions:
+         align_str = print_prediction(current_prediction)
+         print align_str
+
+         id          = current_prediction['id']
+         seq         = current_prediction['read']
+         dna         = current_prediction['dna']
+         chromo      = current_prediction['chr']
+         strand      = current_prediction['strand']
+         start_pos   = current_prediction['start_pos']
+         predExons   = current_prediction['predExons']
 
-      # chr1 +  20-120
-      read  = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
-      currentQualities = [[40]*len(read)]
+         numExons = int(math.ceil(len(predExons) / 2))
+         
+         print alignment_reconstruct(current_prediction,numExons)
+         print id,start_pos,predExons
 
-      id       = 3
-      chromo   = 1
-      strand   = '+'
+      ## chr1 +  20-120
+      #read  = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
+      #currentQualities = [[40]*len(read)]
 
-      genomicSeq_start  = 3500
-      genomicSeq_stop   = 6500
-      print 'Position: ',
-      print genomicSeq_start,genomicSeq_stop 
+      #id       = 3
+      #chromo   = 1
+      #strand   = '+'
 
-      currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
+      #genomicSeq_start  = 3500
+      #genomicSeq_stop   = 6500
+      #print 'Position: ',
+      #print genomicSeq_start,genomicSeq_stop 
 
-      example = (currentSeqInfo,read,currentQualities)
-      self.prediction_set[id] = [example]
+      #currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
 
-      # chr1 - 5000-5100
-      read  = 'ctgtgtatctggttgctcaatatgctcgccggaaaatgaagatcatggatgctgtgagttctccttattgttcattatcaaactgatatgagtttctgat'
-      currentQualities = [[40]*len(read)]
+      #example = (currentSeqInfo,read,currentQualities)
+      #self.prediction_set[id] = [example]
 
-      id       = 4
-      chromo   = 1
-      strand   = '-'
+      ## chr1 - 5000-5100
+      #read  = 'ctgtgtatctggttgctcaatatgctcgccggaaaatgaagatcatggatgctgtgagttctccttattgttcattatcaaactgatatgagtttctgat'
+      #currentQualities = [[40]*len(read)]
 
-      total_size = 30432563
+      #id       = 4
+      #chromo   = 1
+      #strand   = '-'
 
-      genomicSeq_start  = total_size - 6500
-      genomicSeq_stop   = total_size - 3500
-      print 'Position: ',
-      print genomicSeq_start,genomicSeq_stop 
+      #total_size = 30432563
 
-      currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
+      #genomicSeq_start  = total_size - 6500
+      #genomicSeq_stop   = total_size - 3500
+      #print 'Position: ',
+      #print genomicSeq_start,genomicSeq_stop 
 
-      example = (currentSeqInfo,read,currentQualities)
-      self.prediction_set[id] = [example]
+      #currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
 
+      #example = (currentSeqInfo,read,currentQualities)
+      #self.prediction_set[id] = [example]
 
-   def testAlignments(self):
+
+   def _testAlignments(self):
 
       settings = parseSettings('testcase.conf')
 
@@ -166,6 +322,8 @@ class TestQPalmaPrediction(unittest.TestCase):
       print 'Problem counter is %d' % qp.problem_ctr 
 
 
+
+
    def _testAlignments(self):
       run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
 
@@ -208,7 +366,7 @@ class TestQPalmaPrediction(unittest.TestCase):
          align_str = print_prediction(current_prediction)
          print align_str
 
-         id = current_prediction['id']
+         id          = current_prediction['id']
          seq         = current_prediction['read']
          dna         = current_prediction['dna']
          chromo      = current_prediction['chr']
@@ -225,6 +383,7 @@ class TestQPalmaPrediction(unittest.TestCase):
 
 
 if __name__ == '__main__':
-   suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
-   suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
-   unittest.TextTestRunner(verbosity=2).run(suite)
+   train_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
+   predict_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
+   all_suites = unittest.TestSuite([train_suite, predict_suite])
+   unittest.TextTestRunner(verbosity=2).run(all_suites)
index 63b8930..8e2fc03 100644 (file)
@@ -67,12 +67,23 @@ if __name__ == '__main__':
 
    print '\nChecking for optional modules...\n'
 
+
+   solver_installed = False
+
    module_list = ['pycplex', 'cvxopt', 'pymosek']
 
    for mod in module_list:
-      line = 'Status of module %s:\t%s'%(mod,str(check_for_module(mod)))
+      status = check_for_module(mod)
+      solver_installed = solver_installed or status
+      line = 'Status of module %s:\t%s'%(mod,str(status))
       print line
       out_fh.write(line+'\n')
+   
+   if not solver_installed:
+      print '\n'+'#'*80
+      print '\nNOTE: There are no solvers installed!'
+      print 'You will only be able to use QPalma in \'predict\' mode.\n'
+      print '#'*80
 
    # after checking the modules we run some simple testcases on QPalma.
    #data_suite = unittest.TestLoader().loadTestsFromTestCase(TestSequenceUtils)