+ added prediction script with evaluation functions to test QPalma
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 21 Jan 2008 11:29:42 +0000 (11:29 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 21 Jan 2008 11:29:42 +0000 (11:29 +0000)
+ changed parameter storage
+ removed obsolete script

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7509 e1793c9e-67f9-0310-80fc-b846ff1f7b36

python/Configuration.py
python/computeSpliceAlign.py [deleted file]
python/export_param.py
python/qpalma.py
python/qpalma_predict.py [new file with mode: 0644]

index 34fcb74..de579c2 100644 (file)
@@ -173,7 +173,7 @@ totalQualSuppPoints = numQualPlifs*numQualSuppPoints
 numFeatures = numDonSuppPoints + numAccSuppPoints\
 + numLengthSuppPoints + sizeMatchmatrix[0]*sizeMatchmatrix[1] + totalQualSuppPoints 
 
-iter_steps = 10
+iter_steps = 1
 remove_duplicate_scores = False
 print_matrix            = False
 anzpath                 = 2
diff --git a/python/computeSpliceAlign.py b/python/computeSpliceAlign.py
deleted file mode 100644 (file)
index a6d4a49..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from numpy.matlib import zeros
-import pdb
-
-def  computeSpliceAlign(dna, exons):
-   """
-   Exonpos: Anfang Exon bis 1 rechts neben Ende Exon
-   Daraus folgt: SpliceposAnfang ist gleich Ende Exon von Vorgaenger
-   (ausser dem letzten und ersten)
-    und Ende Splicestelle ist gleich Anfang Exon vom naechsten, da Splicestelleende notiert 1 rechts neben Splciestellenende
-   bsp: 
-   cccccA1cccccE1cccccA2 ... AmcccccEmcccccccc
-   cccccXXcccccA1cccccE1 ... Em-1cccXXcccccccc
-   """
-
-   numberOfExons = (exons.shape)[0] # how many rows ?
-   exonSizes = [-1]*numberOfExons
-
-   assert numberOfExons == 3
-
-   for idx in range(numberOfExons):
-      exonSizes[idx] = exons[idx,1] - exons[idx,0]
-
-   sizeMatchmatrix = 6 # -acgtn
-
-   # SpliceAlign vector describes alignment: 
-   # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
-   SpliceAlign = []
-
-   if exons[0,0] > 0:
-      SpliceAlign.extend([4]*(exons[0,0]))
-
-   for idx in range(numberOfExons):
-      exonLength = exonSizes[idx]
-      SpliceAlign.extend([0]*exonLength)
-      
-      if idx < numberOfExons-1:
-         intronLength = exons[idx+1,0] - exons[idx,1]
-         SpliceAlign.extend([1]+[3]*(intronLength-2)+[2])
-
-   if len(dna) > exons[2,1]:
-      SpliceAlign.extend([4]*(len(dna)-exons[2,1]))
-
-   assert len(SpliceAlign) == len(dna), pdb.set_trace()
-
-   # number of matches: just have to look at the underlying est
-   # est = dna(find(SpliceAlign==0)) # exon nucleotides
-   exonPos = [pos for pos,elem in enumerate(SpliceAlign) if elem == 0]
-   est = [elem for pos,elem in enumerate(dna) if pos in exonPos]
-
-   #length_est = sum(exon_sizes) ;
-   totalESTLength = 0
-   for elem in exonSizes:
-      totalESTLength += elem
-
-   assert totalESTLength == len(est) 
-
-   # counts the occurences of a,c,g,t,n in this order
-   numChar = [0]*sizeMatchmatrix
-
-   for elem in est:
-      if elem == 'a':
-         numChar[0] += 1
-      if elem == 'c':
-         numChar[1] += 1
-      if elem == 'g':
-         numChar[2] += 1
-      if elem == 't':
-         numChar[3] += 1
-      if elem == 'n':
-         numChar[4] += 1
-
-   totalNumChar = 0
-   for idx in range(sizeMatchmatrix):
-      totalNumChar += numChar[idx]
-
-   assert totalNumChar == len(est)
-
-   # writing in weight match matrix
-   # matrix is saved columnwise
-   trueWeightMatch = zeros((sizeMatchmatrix*sizeMatchmatrix,1)) # Scorematrix fuer Wahrheit
-   for idx in range(1,sizeMatchmatrix):
-      trueWeightMatch[(sizeMatchmatrix+1)*idx] = numChar[idx-1]
-
-   return SpliceAlign, trueWeightMatch
index be830ce..57d2c94 100644 (file)
@@ -20,7 +20,7 @@ def writeStruct(fid,plif):
       fid.write('%s_len_max=%d\n'%(plif.name,plif.max_len))
       fid.write('%s_len_transform=%s\n'%(plif.name,plif.transform))
 
-def export_param(filename,h,d,a,mmatrix):
+def export_param(filename,h,d,a,mmatrix,qualityPlifs):
 
    # Exports a bz2 file with the trained PALMA. Assumes splice sites and intron length used.
    h.name = 'intron'
@@ -45,4 +45,8 @@ def export_param(filename,h,d,a,mmatrix):
       else:
          fid.write('%f, %f, %f, %f, %f, %f;\n'%(mmatrix[row,0],mmatrix[row,1],mmatrix[row,2],mmatrix[row,3],mmatrix[row,4],mmatrix[row,5]))
 
+   
+   for elem in qualityPlifs:
+      writeStruct(fid, elem);
+
    fid.close()
index 7586613..e21305f 100644 (file)
@@ -34,6 +34,7 @@ from export_param import *
 
 import Configuration
 from Plif import Plf
+from Helpers import *
 
 def getQualityFeatureCounts(qualityPlifs):
    weightQuality = qualityPlifs[0].penalties
@@ -70,8 +71,8 @@ class QPalma:
          Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
          use_quality_scores = False
       elif Configuration.mode == 'using_quality_scores':
-         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = paths_load_data_solexa('training',self.genome_info,self.ARGS)
-         Sequences, Acceptors, Donors, Exons, Ests, Qualities = generateData(100)
+         Sequences, Acceptors, Donors, Exons, Ests, Qualities = paths_load_data_solexa('training',self.genome_info,self.ARGS)
+         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = generateData(100)
          #Sequences_, Acceptors_, Donors_, Exons_, Ests_, Qualities_ = generateData(100)
 
          #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
@@ -208,13 +209,6 @@ class QPalma:
             dna_len = len(dna)
             est_len = len(est)
 
-            #dna ='t'*dna_len
-            #est ='t'*est_len
-            #quality = [math.fabs(elem) for elem in quality]
-            #mmatrix = zeros((6,1))
-            #for pos,elem in enumerate(qualityPlifs):
-            #   elem.penalties = [0.0]*len(elem.penalties)
-         
             ps = h.convert2SWIG()
 
             prb = QPalmaDP.createDoubleArrayFromList(quality)
@@ -313,9 +307,6 @@ class QPalma:
                   if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
                      path_loss[pathNr] += 1
 
-               #newWeightMatch[pathNr,:] = zeros((1,6))
-               #decodedQualityFeatures = zeros((Configuration.totalQualSuppPoints,1))
-
                # Gewichte in restliche Zeilen der Matrix speichern
                wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures])
                allWeights[:,pathNr+1] = wp
@@ -328,9 +319,6 @@ class QPalma:
                AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
 
                # Check wether scalar product + loss equals viterbi score
-               #assert math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) < 1e-6,\
-               #'Scalar prod + loss is not equal Viterbi score. Respective values are %f, %f' % \
-               #(AlignmentScores[pathNr],AlignmentScores[pathNr+1])
                print '%f vs %f' % (newDPScores[pathNr,0], AlignmentScores[pathNr+1])
 
                distinct_scores = False
@@ -401,7 +389,15 @@ class QPalma:
       # end of optimization 
       #  
       print 'Training completed'
-      export_param('elegans.param',h,d,a,mmatrix)
+
+      pa = para()
+      pa.h = h
+      pa.d = d
+      pa.a = a
+      pa.mmatrix = mmatrix
+      pa.qualityPlifs = qualityPlifs
+
+      cPickle.dump(pa,open('elegans.param','w+'))
       self.logfh.close()
 
 def fetch_genome_info(ginfo_filename):
diff --git a/python/qpalma_predict.py b/python/qpalma_predict.py
new file mode 100644 (file)
index 0000000..5ff8480
--- /dev/null
@@ -0,0 +1,406 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+###########################################################
+#
+# 
+#
+###########################################################
+
+import sys
+import subprocess
+import scipy.io
+import pdb
+import os.path
+
+from numpy.matlib import mat,zeros,ones,inf
+from numpy.linalg import norm
+
+import QPalmaDP
+from SIQP_CPX import SIQPSolver
+
+from paths_load_data_pickle import *
+from paths_load_data_solexa import *
+
+from generateEvaluationData import *
+
+from computeSpliceWeights import *
+from set_param_palma import *
+from computeSpliceAlignWithQuality import *
+from penalty_lookup_new import *
+from compute_donacc import *
+from TrainingParam import Param
+from export_param import *
+
+import Configuration as Conf
+from Plif import Plf
+from Helpers import *
+
+def getQualityFeatureCounts(qualityPlifs):
+   weightQuality = qualityPlifs[0].penalties
+   for currentPlif in qualityPlifs[1:]:
+      weightQuality = numpy.vstack([weightQuality, currentPlif.penalties])
+
+   return weightQuality 
+
+class QPalma:
+   """
+   A training method for the QPalma project
+   """
+   
+   def __init__(self):
+      self.ARGS = Param()
+
+      self.logfh = open('qpalma.log','w+')
+      gen_file= '%s/genome.config' % self.ARGS.basedir
+
+      ginfo_filename = 'genome_info.pickle'
+      self.genome_info = fetch_genome_info(ginfo_filename)
+
+      self.plog('genome_info.basedir is %s\n'%self.genome_info.basedir)
+
+      #self.ARGS.train_with_splicesitescoreinformation = False
+
+   def plog(self,string):
+      self.logfh.write(string)
+      self.logfh.flush()
+
+   def run(self):
+      # Load the whole dataset 
+      if Conf.mode == 'normal':
+         Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
+         use_quality_scores = False
+      elif Conf.mode == 'using_quality_scores':
+         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = paths_load_data_solexa('training',self.genome_info,self.ARGS)
+         Sequences, Acceptors, Donors, Exons, Ests, Qualities = generateData(100)
+         #Sequences_, Acceptors_, Donors_, Exons_, Ests_, Qualities_ = generateData(100)
+
+         #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
+         #Qualities = []
+         #for i in range(len(Ests)):
+         #   Qualities.append([40]*len(Ests[i]))
+         use_quality_scores = True
+      else:
+         assert(False)
+
+      # number of training instances
+      N = len(Sequences) 
+      self.numExamples = N
+      assert N == len(Acceptors) and N == len(Acceptors) and N == len(Exons)\
+      and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
+      self.plog('Number of training examples: %d\n'% N)
+      print 'Number of features: %d\n'% Conf.numFeatures
+
+      iteration_steps         = Conf.iter_steps ; #upper bound on iteration steps
+      remove_duplicate_scores = Conf.remove_duplicate_scores 
+      print_matrix            = Conf.print_matrix 
+      anzpath                 = Conf.anzpath 
+
+      # Initialize parameter vector  / param = numpy.matlib.rand(126,1)
+      param_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/python/elegans.param'
+      param = load_param(param_filename)
+
+      # Set the parameters such as limits penalties for the Plifs
+      [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+
+      # delete splicesite-score-information
+      if not self.ARGS.train_with_splicesitescoreinformation:
+         for i in range(len(Acceptors)):
+            if Acceptors[i] > -20:
+               Acceptors[i] = 1
+            if Donors[i] >-20:
+               Donors[i] = 1
+
+      # Initialize solver 
+      if Conf.USE_OPT:
+         self.plog('Initializing problem...\n')
+         solver = SIQPSolver(Conf.numFeatures,self.numExamples,Conf.C,self.logfh)
+
+      # stores the number of alignments done for each example (best path, second-best path etc.)
+      num_path = [anzpath]*N 
+      # stores the gap for each example
+      gap      = [0.0]*N
+
+      #############################################################################################
+      # Training
+      #############################################################################################
+      self.plog('Starting training...\n')
+
+      donSP       = Conf.numDonSuppPoints
+      accSP       = Conf.numAccSuppPoints
+      lengthSP    = Conf.numLengthSuppPoints
+      mmatrixSP   = Conf.sizeMatchmatrix[0]\
+      *Conf.sizeMatchmatrix[1]
+      numq        = Conf.numQualSuppPoints
+      totalQualSP = Conf.totalQualSuppPoints
+
+      currentPhi = zeros((Conf.numFeatures,1))
+      totalQualityPenalties = zeros((totalQualSP,1))
+
+      for exampleIdx in range(self.numExamples):
+
+         dna = Sequences[exampleIdx] 
+         est = Ests[exampleIdx] 
+
+         if Conf.mode == 'normal':
+            quality = [40]*len(est)
+
+         if Conf.mode == 'using_quality_scores':
+            quality = Qualities[exampleIdx]
+
+         exons = Exons[exampleIdx] 
+         # NoiseMatrix = Noises[exampleIdx] 
+         don_supp = Donors[exampleIdx] 
+         acc_supp = Acceptors[exampleIdx] 
+
+         # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
+         trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
+         
+         # Calculate the weights
+         trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
+         trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
+
+         currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
+         currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
+         currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
+         currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
+
+         if Conf.mode == 'using_quality_scores':
+            totalQualityPenalties = param[-totalQualSP:]
+            currentPhi[donSP+accSP+lengthSP+mmatrixSP:]                    = totalQualityPenalties[:]
+
+         # Calculate w'phi(x,y) the total score of the alignment
+         trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
+
+         # The allWeights vector is supposed to store the weight parameter
+         # of the true alignment as well as the weight parameters of the
+         # num_path[exampleIdx] other alignments
+         allWeights = zeros((Conf.numFeatures,num_path[exampleIdx]+1))
+         allWeights[:,0] = trueWeight[:,0]
+
+         AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
+         AlignmentScores[0] = trueAlignmentScore
+
+         ################## Calculate wrong alignment(s) ######################
+
+         # Compute donor, acceptor with penalty_lookup_new
+         # returns two double lists
+         donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
+
+         #myalign wants the acceptor site on the g of the ag
+         acceptor = acceptor[1:]
+         acceptor.append(-inf)
+
+         # for now we don't use donor/acceptor scores
+         donor = [-inf] * len(donor)
+         acceptor = [-inf] * len(donor)
+
+         dna = str(dna)
+         est = str(est)
+         dna_len = len(dna)
+         est_len = len(est)
+
+         ps = h.convert2SWIG()
+
+         prb = QPalmaDP.createDoubleArrayFromList(quality)
+         chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
+
+         matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
+         mm_len = Conf.sizeMatchmatrix[0]*Conf.sizeMatchmatrix[1]
+
+         d_len = len(donor)
+         donor = QPalmaDP.createDoubleArrayFromList(donor)
+         a_len = len(acceptor)
+         acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
+
+         # Create the alignment object representing the interface to the C/C++ code.
+         currentAlignment = QPalmaDP.Alignment(Conf.numQualPlifs,Conf.numQualSuppPoints, use_quality_scores)
+
+         c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
+
+         # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
+         currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
+          est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
+          acceptor, a_len, c_qualityPlifs, remove_duplicate_scores,
+          print_matrix)
+
+         c_SpliceAlign  = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
+         c_EstAlign     = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
+         c_WeightMatch  = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
+         c_DPScores     = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
+
+         c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Conf.totalQualSuppPoints*num_path[exampleIdx]))
+
+         currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
+         c_WeightMatch, c_DPScores, c_qualityPlifsFeatures)
+
+         newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
+         newEstAlign    = zeros((est_len*num_path[exampleIdx],1))
+         newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
+         newDPScores    = zeros((num_path[exampleIdx],1))
+         newQualityPlifsFeatures = zeros((Conf.totalQualSuppPoints*num_path[exampleIdx],1))
+
+         #print 'newSpliceAlign'
+         for i in range(dna_len*num_path[exampleIdx]):
+            newSpliceAlign[i] = c_SpliceAlign[i]
+         #   print '%f' % (spliceAlign[i])
+
+         #print 'newEstAlign'
+         for i in range(est_len*num_path[exampleIdx]):
+            newEstAlign[i] = c_EstAlign[i]
+         #   print '%f' % (spliceAlign[i])
+
+         #print 'weightMatch'
+         for i in range(mm_len*num_path[exampleIdx]):
+            newWeightMatch[i] = c_WeightMatch[i]
+         #   print '%f' % (weightMatch[i])
+
+         #print 'ViterbiScores'
+         for i in range(num_path[exampleIdx]):
+            newDPScores[i] = c_DPScores[i]
+
+         for i in range(Conf.totalQualSuppPoints*num_path[exampleIdx]):
+            newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
+
+         #  equals palma up to here
+
+         #print "Calling destructors"
+         del c_SpliceAlign
+         del c_EstAlign
+         del c_WeightMatch
+         del c_DPScores
+         del c_qualityPlifsFeatures
+         del currentAlignment
+
+         newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],dna_len)
+         newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
+         # Calculate weights of the respective alignments Note that we are
+         # calculating n-best alignments without hamming loss, so we
+         # have to keep track which of the n-best alignments correspond to
+         # the true one in order not to incorporate a true alignment in the
+         # constraints. To keep track of the true and false alignments we
+         # define an array true_map with a boolean indicating the
+         # equivalence to the true alignment for each decoded alignment.
+         true_map = [0]*(num_path[exampleIdx]+1)
+         true_map[0] = 1
+         path_loss   = [0]*(num_path[exampleIdx])
+
+         for pathNr in range(num_path[exampleIdx]):
+            weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp, acc_supp)
+
+            decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
+            for qidx in range(Conf.totalQualSuppPoints):
+               decodedQualityFeatures[qidx] = newQualityPlifsFeatures[(pathNr*Conf.totalQualSuppPoints)+qidx]
+
+            path_loss[pathNr] = 0
+            # sum up positionwise loss between alignments
+            for alignPosIdx in range(newSpliceAlign[pathNr,:].shape[1]):
+               if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
+                  path_loss[pathNr] += 1
+
+            # Gewichte in restliche Zeilen der Matrix speichern
+            wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures])
+            allWeights[:,pathNr+1] = wp
+
+            hpen = mat(h.penalties).reshape(len(h.penalties),1)
+            dpen = mat(d.penalties).reshape(len(d.penalties),1)
+            apen = mat(a.penalties).reshape(len(a.penalties),1)
+            features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
+
+            AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
+
+            # Check wether scalar product + loss equals viterbi score
+            print '%f vs %f' % (newDPScores[pathNr,0], AlignmentScores[pathNr+1])
+
+            distinct_scores = False
+            if AlignmentScores[pathNr] != AlignmentScores[pathNr+1]:
+               distinct_scores = True
+            
+            if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) + [0,1][distinct_scores and (pathNr>0)] <= 1e-5:
+               pdb.set_trace()
+
+            #  # if the pathNr-best alignment is very close to the true alignment consider it as true
+            if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
+               true_map[pathNr+1] = 1
+
+            
+            evaluateExample(exons,newSpliceAlign[pathNr,:])
+
+      print 'Prediction completed'
+      self.logfh.close()
+
+def fetch_genome_info(ginfo_filename):
+   if not os.path.exists(ginfo_filename):
+      cmd = ['']*4
+      cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
+      cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
+      cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
+      cmd[3] = 'save genome_info.mat genome_info'  
+      full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
+
+      obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+      out,err = obj.communicate()
+      assert err == '', 'An error occured!\n%s'%err
+
+      ginfo = scipy.io.loadmat('genome_info.mat')
+      cPickle.dump(self.genome_info,open(ginfo_filename,'w+'))
+      return ginfo['genome_info']
+
+   else:
+      return cPickle.load(open(ginfo_filename))
+
+def plifs2param(h,d,a,mmatrix,qualityPlifs):
+   donSP       = Conf.numDonSuppPoints
+   accSP       = Conf.numAccSuppPoints
+   lengthSP    = Conf.numLengthSuppPoints
+   mmatrixSP   = Conf.sizeMatchmatrix[0]\
+   *Conf.sizeMatchmatrix[1]
+
+
+   param = zeros((Conf.numFeatures,1))
+   param[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
+   param[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
+   param[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
+   param[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
+
+   for idx in range(len(qualityPlifs)):
+      currentPlif       = qualityPlifs[idx]
+      begin             = lengthSP+donSP+accSP+mmatrixSP+(idx*Conf.numQualSuppPoints)
+      end               = lengthSP+donSP+accSP+mmatrixSP+((idx+1)*Conf.numQualSuppPoints)
+      param[begin:end]  = mat(currentPlif.penalties).reshape(Conf.numQualSuppPoints,1)
+
+   return param
+
+def load_param(filename):
+   param = None 
+   try:
+      para = cPickle.load(open(filename))
+   except:
+      print 'Error: Could not open parameter file!'
+      sys.exit(1)
+   
+   param = plifs2param(para.h,para.d,para.a,para.mmatrix,para.qualityPlifs)
+   return param
+
+def evaluateExample(exons,SpliceAlign):
+   newExons = []
+   oldElem = -1
+   SpliceAlign = SpliceAlign.flatten().tolist()[0]
+   for pos,elem in enumerate(SpliceAlign):
+      if pos == 0:
+         oldElem = -1
+      else:
+         oldElem = SpliceAlign[pos-1]
+
+      if oldElem != 0 and elem == 0:
+         newExons.append(pos)
+
+      if oldElem == 0 and elem != 0:
+         newExons.append(pos-1)
+
+   pdb.set_trace()
+   
+
+if __name__ == '__main__':
+   qpalma = QPalma()
+   qpalma.run()