+ added first converted scripts
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 30 Nov 2007 13:13:42 +0000 (13:13 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 30 Nov 2007 13:13:42 +0000 (13:13 +0000)
+ SpliceAlign computation works

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6888 e1793c9e-67f9-0310-80fc-b846ff1f7b36

14 files changed:
python/.computeSpliceAlign.py.swp [new file with mode: 0644]
python/.compute_SpliceAlign_local.m.swp [new file with mode: 0644]
python/.modsel_paths.m.swp [new file with mode: 0644]
python/.paths_load_data.py.swp [new file with mode: 0644]
python/.qpalma.py.swp [new file with mode: 0644]
python/SIQP.py [new file with mode: 0644]
python/SIQP_CPX.py [new file with mode: 0644]
python/computeSpliceAlign.py [new file with mode: 0644]
python/computeSpliceWeights.py [new file with mode: 0644]
python/numpyUtils.py [new file with mode: 0644]
python/paths_load_data.py [new file with mode: 0644]
python/qpalma.log [new file with mode: 0644]
python/qpalma.py [new file with mode: 0644]
python/set_param_palma.py [new file with mode: 0644]

diff --git a/python/.computeSpliceAlign.py.swp b/python/.computeSpliceAlign.py.swp
new file mode 100644 (file)
index 0000000..5191fdd
Binary files /dev/null and b/python/.computeSpliceAlign.py.swp differ
diff --git a/python/.compute_SpliceAlign_local.m.swp b/python/.compute_SpliceAlign_local.m.swp
new file mode 100644 (file)
index 0000000..5e286ed
Binary files /dev/null and b/python/.compute_SpliceAlign_local.m.swp differ
diff --git a/python/.modsel_paths.m.swp b/python/.modsel_paths.m.swp
new file mode 100644 (file)
index 0000000..f08f95f
Binary files /dev/null and b/python/.modsel_paths.m.swp differ
diff --git a/python/.paths_load_data.py.swp b/python/.paths_load_data.py.swp
new file mode 100644 (file)
index 0000000..d540f74
Binary files /dev/null and b/python/.paths_load_data.py.swp differ
diff --git a/python/.qpalma.py.swp b/python/.qpalma.py.swp
new file mode 100644 (file)
index 0000000..7e345a8
Binary files /dev/null and b/python/.qpalma.py.swp differ
diff --git a/python/SIQP.py b/python/SIQP.py
new file mode 100644 (file)
index 0000000..d8ada7e
--- /dev/null
@@ -0,0 +1,248 @@
+#!/usr/bin/env python
+# -*- coding:latin-1 -*-
+
+import sys
+import os
+import os.path
+import logging
+import math
+import pdb
+
+import cvxopt.base as cb
+
+import logging
+logging.basicConfig(level=logging.DEBUG,format='%(levelname)s %(message)s')
+
+def asymCoords(t):
+   size_upstream = t[0]
+   size_downstream = t[1]
+   assert size_upstream >= 1 and size_upstream <= 39
+   assert size_downstream >= 1 and size_downstream <= 39
+   
+   if size_downstream > size_upstream:
+      asym_small = size_upstream
+      asym_big   = size_downstream
+   else:
+      asym_small = size_downstream
+      asym_big   = size_upstream
+
+   asym_offset = 0
+   for r in range(asym_small):
+      asym_offset += (40-r)-r;
+
+   return (asym_offset+asym_big)-1
+
+def calcTuples(i,j):
+   return filter(lambda t: t[0]<=t[1] and t[0]>0 and t[0]<40 and t[1]>0 and t[1]<40,[(i+1,j),(i-1,j),(i,j+1),(i,j-1)])
+
+class SIQP:
+   """
+   This class is a wrapper for the cvxopt qp solver.
+   It has the purpose to support an iterative addition of
+   constraints to the original problem.
+
+   We want to solve a problem of the form
+
+   min  1/2 * C * x' * P * x + q' * x
+    x
+
+    s.t. 
+      < w,f+ > - < w,f- > >= 1 - xi_i    for all i, for all f-
+
+   where x is a vector storing information on the parameters w_i and the slacks xi_i
+   so x = [w;xi] 
+
+   """
+
+   def __init__(self,fSize,numExamples,c):
+      assert fSize > 0, 'You have to have at least one feature!'
+      assert numExamples > 0, 'You have to have at least one example!'
+      self.numFeatures = fSize
+      self.numExamples = numExamples
+      self.C = c
+      self.EPSILON = 10e-15
+
+      logging.debug("Starting SIQP solver with %d examples. A feature space dim. of %d.", numExamples,fSize)
+      logging.debug("Regularization parameters are C=%f."%(self.C))
+
+      self.old_w = None
+
+      self.dimP = self.numFeatures + self.numExamples
+      self.createRegularizer()
+
+   def parseParameterFile(self):
+      paramFile = 'Parameters.h'
+      paramFileLoc = os.path.join('/fml/ag-raetsch/home/fabio/projects/rfp_current',paramFile)
+
+      self.featMap = { 
+         'BASE_PAIR':0,
+         'HELIX_CLOSING':1,
+         'STACKING_PAIR':2,
+         'STACKING_SCORE':3,
+         'TERM_MIS':4, 
+         'HAIRPIN':5, 
+         'BULGE':6, 
+         'INTERNAL_LENGTH':7,
+         'INTERNAL_FULL':8,
+         'INTERNAL_ASYM':9,
+         'SINGLE_LEFT':10, 
+         'SINGLE_RIGHT':11 }
+
+      # ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU')
+      self.start_stops = [
+      ('BASE_PAIR_AA','BASE_PAIR_UU'), 
+      ('HELIX_CLOSING_AA','HELIX_CLOSING_UU'), 
+      ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU'),
+      ('STACKING_1_SCORE','STACKING_5_SCORE'), 
+      ('TERMINAL_MISMATCH_AAAA','TERMINAL_MISMATCH_UUUU'),
+      ('HAIRPIN_LOOP_1','HAIRPIN_LOOP_30'),
+      ('BULGE_LOOP_1','BULGE_LOOP_30'),('INTERIOR_LOOP_1','INTERIOR_LOOP_30'),
+      ('INTERNAL_LENGTH_1','INTERNAL_LENGTH_30'),
+      ('INTERNAL_FULL_1_1','INTERNAL_FULL_15_15'),
+      ('INTERNAL_ASYMMETRY_1','INTERNAL_ASYMMETRY_30'),
+      ('SINGLE_BASE_PAIR_STACKING_LEFT_AAA','SINGLE_BASE_PAIR_STACKING_LEFT_UUU'),
+      ('SINGLE_BASE_PAIR_STACKING_RIGHT_AAA','SINGLE_BASE_PAIR_STACKING_RIGHT_UUU') ]
+
+      self.param_pos = [(0,0)]*len(self.featMap)
+   
+      for line in open(paramFileLoc):
+         if line.startswith('#define') and not line.startswith('#define _'):
+            line = line.replace('#define','').strip()
+            name,value = line.split(' ')
+            value = int(value)
+
+            for key,id in self.featMap.items():
+               if name == self.start_stops[id][0]:
+                  self.param_pos[id] = (value,0)
+               if name == self.start_stops[id][1]:
+                  a,b = self.param_pos[id] 
+                  self.param_pos[id] = (a,value)
+
+      self.param_ranges = [(0,0)]*len(self.featMap)
+      self.param_coeff = [1.0]*len(self.featMap)
+
+      self.param_coeff = [
+            0.33, 2.5, 1.0, 1.0,
+            1.5, 0.5, 5.0, 3.0,
+            2.5, 5.0, 2.5, 2.0]
+
+      for key,id in self.featMap.items():
+         self.param_ranges[id] = range(self.param_pos[id][0],self.param_pos[id][1])
+
+   def regularizeTogether(self,pos1,pos2):
+      self.P[pos1,pos2] = -1.0 
+      self.P[pos2,pos1] = -1.0
+      #self.P[pos1,pos1] += 1.0
+
+   def createRNARegularizer(self):
+      self.parseParameterFile()
+
+      self.loopRanges = [ self.param_ranges[self.featMap['STACKING_SCORE']], self.param_ranges[self.featMap['HAIRPIN']],
+      self.param_ranges[self.featMap['BULGE']],self.param_ranges[self.featMap['INTERNAL_LENGTH']]]
+
+      self.P = cb.matrix(0.0,(self.dimP,self.dimP))
+
+      # first create unit matrix (only upper block)
+      for i in range(self.numFeatures):
+         self.P[i,i] = 1.0
+
+      # now add terms for loop smoothness constraints
+      #for currentRange in self.loopRanges:
+      #   for i in currentRange:
+      #      self.P[i,i+1] = -1.0 
+      #      self.P[i+1,i] = -1.0
+      #      self.P[i,i] += 1.0
+
+      b0,b1,b2,b3 = 1,4,16,64
+      posList = []
+      
+      for i in range(4):
+         for j in range(4):
+            for k in range(4):
+               for l in range(4):
+                  
+                  index1 = i*b3 + j*b2 + k*b1 + l*b0
+                  index2 = l*b3 + k*b2 + j*b1 + i*b0
+
+                  if index1 > index2:
+                     continue;
+
+                  posList.append((i,j,k,l))
+
+      stacking_start = (self.param_pos[self.featMap['STACKING_PAIR']])[0]
+
+      for pos1,elem1 in enumerate(posList):
+         for pos2,elem2 in enumerate(posList):
+            if elem1 == elem2:
+               continue
+
+            i,j,k,l = elem1
+            ip,jp,kp,lp = elem2
+
+            if (k,l,i,j) == (ip,jp,kp,lp):
+               print stacking_start+pos1,stacking_start+pos2
+               #self.regularizeTogether(stacking_start+pos1,stacking_start+pos2)
+
+      # Scale regularization terms
+      for key,id in self.featMap.items():
+         currentRange = self.param_ranges[id]
+         currentCoeff = self.param_coeff[id]
+
+         self.P[currentRange,currentRange] *= currentCoeff
+
+      for i in range(self.numFeatures,self.dimP):
+         assert self.P[i,i] == 0.0
+
+   def createUnitRegularizer(self):
+      # set regularizer to zero
+      self.P = cb.matrix(0.0,(self.dimP,self.dimP))
+      for i in range(self.numFeatures):
+         self.P[i,i] = 1.0
+      # end of zeroing regularizer
+
+   def createRegularizer(self):
+      self.createUnitRegularizer()
+      #self.createRNARegularizer()
+
+      q_array = [0.0]*self.numFeatures + [1.0]*self.numExamples
+      self.q = cb.matrix(q_array,(self.numFeatures+self.numExamples,1),'d')
+      self.q = self.C * self.q
+
+      self.G = cb.matrix(0.0,(self.numExamples,self.numFeatures+self.numExamples))
+      for i in range(self.numExamples):
+         self.G[i,self.numFeatures+i] = -1
+
+      self.h = cb.matrix(0,(self.numExamples,1),'d')
+
+if __name__ == '__main__':
+   sq = SIQP(3,2,100.0)
+
+   """
+   function [C_qp_penalties,C_qp_smoothness,C_lp_smoothness,C_qp_smallness,column_eps,Q,f,LB,UB] = paths_create_qp(N,C)
+   % function [C_qp_penalties,C_qp_smoothness,C_lp_smoothness,C_qp_smallness,column_eps,Q,f,LB,UB] = paths_create_qp(N,C)
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % qp_solve: min ( <res, f'> + 1/2 res' Q res)
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   C_qp_penalties  = C ;    %was 0.01
+   C_qp_smoothness = C ;    %was 0.01
+   C_lp_smoothness = 0.00 ; %linear problem parameters
+   C_qp_smallness  = 1e-6 ; %was 1e-6 
+   column_eps      = 1e-3 ;
+
+   Q=zeros(2*126+N) ; % 1/2 * res' * Q * res
+   Q(1:90,1:90)           = C_qp_smallness*diag(ones(1,90)) ;
+   Q(91:126,91:126)       = C_qp_penalties*diag(ones(1,36)) ;
+   Q(127:2*126,127:2*126) = C_qp_smoothness*diag(ones(1,126)) ;
+   f = [zeros(1,126) C_lp_smoothness*ones(1,126) ones(1,N)/N] ; % <res, f'>
+
+
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % qp_solve: LB <= res <= UB
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   INF = 1e20 ;
+   LB = [-INF*ones(1,126) zeros(1,126) zeros(1,N)] ; % lower bound for res
+   UB = [ INF*ones(1,2*126) INF*ones(1,N)] ; % upper bound for res
+
+   """
diff --git a/python/SIQP_CPX.py b/python/SIQP_CPX.py
new file mode 100644 (file)
index 0000000..6fe225a
--- /dev/null
@@ -0,0 +1,333 @@
+#!/usr/bin/env python
+# -*- coding:latin-1 -*-
+
+import sys
+import os
+import os.path
+import logging
+import math
+
+import cvxopt.base as cb
+
+import logging
+logging.basicConfig(level=logging.DEBUG,format='%(levelname)s %(message)s')
+
+import CPX
+import numpy as ny
+import numpy.matlib as nyml
+from cplex_const import *
+
+from SIQP import SIQP
+
+OPTIMAL = [CPX_STAT_OPTIMAL, CPXMIP_OPTIMAL, CPXMIP_OPTIMAL_TOL]
+UNBOUNDED = [CPX_STAT_UNBOUNDED, CPXMIP_UNBOUNDED]
+INFEASIBLE = [CPX_STAT_INFEASIBLE, CPXMIP_INFEASIBLE, CPXMIP_INForUNBD]
+
+class FloatMatrix:
+   
+   TYPE = nyml.float64
+
+   def __init__(self,list):
+      self.data = nyml.mat(list, dtype=self.TYPE).T
+
+class IntMatrix:
+   
+   TYPE = nyml.int32
+
+   def __init__(self,list):
+      self.data = nyml.mat(list, dtype=self.TYPE).T
+
+class CharMatrix:
+   
+   TYPE = '|S1'
+
+   def __init__(self,list):
+      self.data = nyml.mat(list, dtype=self.TYPE).T
+
+class SIQPSolver(SIQP):
+   """
+   This algorithm solves the problem 
+
+   min 1/2 x'Px + q'x
+   s.t. 
+         Gx >= h
+   """
+
+   # Define types and constants
+   Inf   = CPX_INFBOUND
+
+   def __init__(self,fSize,numExamples,c,proto):
+      SIQP.__init__(self,fSize,numExamples,c)
+      self.numFeatures = fSize
+      self.numExamples = numExamples
+      self.numVariables = self.numFeatures + self.numExamples
+      self.old_w = None
+      self.old_objective_value = -(sys.maxint-1)
+
+      self.addedConstraints = 0
+
+      self.protocol = proto
+
+      self.objsen = CPX_MIN
+
+      # open CPLEX environment, set some parameters
+      self.env = CPX.openCPLEX()
+
+      CPX.setintparam(self.env, CPX_PARAM_SCRIND, CPX_ON) # print >> self.protocol, info to screen
+      CPX.setintparam(self.env, CPX_PARAM_DATACHECK, CPX_ON)
+
+      # create CPLEX problem, add objective and constraints to it
+      self.lp = CPX.createprob(self.env, 'test1')
+
+      self.obj = FloatMatrix([0.0]*self.numFeatures + [1.0]*self.numExamples).data
+      cFactor = 1.0*self.C/self.numExamples
+      self.obj =  cFactor * self.obj
+      assert self.obj.shape == (self.numVariables,1)
+      
+      self.ub = FloatMatrix([self.Inf] * self.numVariables).data
+      self.lb = FloatMatrix([-self.Inf] * self.numFeatures + [0.0]*self.numExamples).data
+
+      self.matbeg = IntMatrix([1]*self.numVariables).data
+      self.matbeg[0] = 0
+
+      self.matcnt = IntMatrix([0]*self.numVariables).data
+      self.matcnt[0] = 1
+
+      self.matind = IntMatrix([0]).data
+      self.matval = FloatMatrix([0.0]).data
+
+      self.rhs = FloatMatrix([0.0]*self.numVariables).data
+      self.sense = CharMatrix(['G']*self.numVariables).data
+
+      self.numConstraints = 1
+
+      CPX.copylp(self.env, self.lp, self.numVariables, self.numConstraints, self.objsen, 
+               self.obj, self.rhs, self.sense,
+               self.matbeg, self.matcnt, self.matind, self.matval, 
+               self.lb, self.ub) 
+
+      self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval = self.cpx_matrix(self.P)
+      CPX.copyquad(self.env, self.lp, self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval)
+
+   def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
+      """
+      This method adds one contraint to the optimization problem.
+      """
+      self.lastDelta = energy_deltas
+      self.lastLoss = loss
+      self.lastIndex = example_idx
+
+      assert loss >= 0.0, 'Loss has to be non-negative!'
+      assert -1 < example_idx and example_idx < self.numExamples
+
+      #if loss < self.EPSILON:
+      #    return False
+
+      #if self.old_w != None:
+      #   scalar_prod = self.old_w[0:self.numFeatures,0].trans()  * energy_deltas
+      #   old_slack = self.old_w[self.numFeatures+example_idx,0]
+      #   if useMarginRescaling:
+      #       if scalar_prod[0] + old_slack > loss: # leave some room for inaccuracies
+      #           print 'Current example already fulfills constraint.'
+      #           print >> self.protocol,  'Current example already fulfills constraint.'
+      #           return False
+
+      # printing out respective loss resp. scalar prod contributions
+      if self.old_w != None:
+         print >> self.protocol, 'Loss contrib is %d'%loss
+         print >> self.protocol, 'Scalar prod. contrib is %f'%((self.old_w[0:self.numFeatures].T*energy_deltas)[0])
+
+      numNewRows = 1
+      numNewCols = 0
+   
+      row_rhs = FloatMatrix([loss]).data
+      row_sense = CharMatrix(['G']).data
+
+      row_matbeg = IntMatrix([0]).data
+
+      row_entries = [0.0]*self.numVariables
+      indices = [0]*self.numVariables
+
+      currentVal = 0.0
+      numNonZeroElems = 0
+      for idx in range(self.numFeatures):
+         currentVal = energy_deltas[idx]
+         if math.fabs(currentVal) > self.EPSILON:
+            row_entries[numNonZeroElems] = currentVal
+            indices[numNonZeroElems] = idx
+            numNonZeroElems += 1
+
+      row_matind = IntMatrix([0]*(numNonZeroElems+1)).data
+      row_matval = FloatMatrix([0.0]*(numNonZeroElems+1)).data
+
+      for pos in range(numNonZeroElems):
+         row_matind[pos] = indices[pos]
+         row_matval[pos] = row_entries[pos]
+
+      # coefficient 1.0 of xi
+      row_matind[numNonZeroElems] = self.numFeatures+example_idx
+      row_matval[numNonZeroElems] = 1.0
+
+      status = CPX.addrows (self.env, self.lp, numNewCols, numNewRows, numNonZeroElems+1, row_rhs,
+                                  row_sense, row_matbeg, row_matind, row_matval)
+      
+      assert status == 0, 'Error ocurred while adding constraint.'
+
+      #CPX.writeprob(self.env,self.lp,"problem_%d.SAV"%self.addedConstraints)
+      #CPX.writeprob(self.env,self.lp,"problem_%d.LP"%self.addedConstraints)
+      self.addedConstraints += 1
+
+      return True
+               
+   def solve(self):
+      # solve the problem
+      CPX.qpopt(self.env,self.lp)
+
+      # get the solution
+      lpstat = CPX.getstat(self.env,self.lp)
+      if lpstat in OPTIMAL or lpstat == 6:
+         x, objval = CPX.getx(self.env,self.lp), CPX.getobjval(self.env,self.lp)
+         print >> self.protocol, 'SIQP_CPX: Objective is: %f'%objval
+
+         # Check that objective value is monotonically increasing
+         if math.fabs(objval - self.old_objective_value) >= 10e-5:
+            assert objval >= self.old_objective_value
+
+         self.old_objective_value = objval
+
+         new_w = cb.matrix([0.0]*self.numVariables,(self.numVariables,1))
+         for i in range(self.numVariables):
+            new_w[i,0] = x[i]
+
+         self.old_w = new_w
+
+         # Check for non-negative slacks    
+         sumOfSlacks = 0.0
+         print >> self.protocol, 'Slacks are:'
+         for i in range(self.numExamples):
+            print >> self.protocol, '%f ' % new_w[self.numFeatures+i,0]
+            assert new_w[self.numFeatures+i,0] >= 0.0
+            sumOfSlacks += new_w[self.numFeatures+i,0]
+
+         scalarProdDiff = (new_w[0:self.numFeatures,0].T * self.lastDelta)[0]
+         print >> self.protocol, 'Constraint: %f >= %d - %f'%(scalarProdDiff,self.lastLoss,new_w[self.numFeatures+self.lastIndex])
+
+         # evalute parts of the objective
+         regularizationPart = (0.5 *new_w.T*self.P*new_w)[0]
+         lossPart = 1.0*self.C/self.numExamples * sumOfSlacks
+         assert lossPart >= 0.0, 'Error we have a negative loss contribution'
+         print >> self.protocol, 'Parts of objective: %e %e'%(regularizationPart,lossPart)
+
+         return objval,new_w[0:self.numFeatures,0],new_w[self.numFeatures:,0]
+
+      elif lpstat in UNBOUNDED: 
+         print >> self.protocol, "Solution unbounded"
+         assert False
+      elif lpstat in INFEASIBLE: 
+         print >> self.protocol, "Solution infeasible"
+         assert False
+      else: 
+         print >> self.protocol, "Solution code: ", lpstat
+         assert False
+
+   def cpx_matrix(self,M):
+      """
+      This method creates a sparse CPLEX representation of a given
+      cvxopt Matrix M.
+      """
+
+      nRows,nCols = M.size
+      numEntries = nRows*nCols
+
+      matbeg = IntMatrix([0]*nCols).data
+      matcnt = IntMatrix([0]*nCols).data
+
+      nonZeros = 0
+      elemCounter = 0
+      entry = 0.0
+
+      valVec = [0.0]*numEntries
+      indVec = [0.0]*numEntries
+
+      for currentCol in range(nCols):
+         nonZeros = 0
+         matbeg[currentCol] = elemCounter
+
+         for currentRow in range(nRows):
+            entry = M[currentRow,currentCol]
+            if math.fabs(entry) > self.EPSILON:
+               nonZeros += 1
+               indVec[elemCounter] = currentRow
+               valVec[elemCounter] = entry
+               elemCounter += 1
+
+         matcnt[currentCol] = nonZeros
+
+      totalNonZeros = elemCounter
+
+      matind = IntMatrix([0]*totalNonZeros).data
+      matval = FloatMatrix([0.0]*totalNonZeros).data
+
+      for idx in range(totalNonZeros):
+         matind[idx] = indVec[idx];
+         matval[idx] = valVec[idx];
+    
+      print >> self.protocol, 'nRows,nCols: %d,%d'%(nRows,nCols)
+      print >> self.protocol, 'totalNonZeros ' + str(totalNonZeros)
+
+      return matbeg,matcnt,matind,matval
+
+   def cpx_matrix_id(self):
+      """
+      This method creates a sparse CPLEX representation of a given
+      cvxopt Matrix M.
+      """
+
+      nRows,nCols = self.numVariables,self.numVariables
+      matbeg = IntMatrix([0]*nCols).data
+      matcnt = IntMatrix([0]*nCols).data
+
+      matind = IntMatrix([0]*nCols).data
+      matval = FloatMatrix([0.0]*nCols).data
+
+      for i in range(nCols):
+         matbeg[i] = i
+         matcnt[i] = 1
+         matind[i] = i
+         matval[i] = 1.0
+
+      return matbeg,matcnt,matind,matval
+
+   def delete(self):
+      # free the problem
+      CPX.freeprob(self.env,self.lp)
+      # close CPLEX environment
+      CPX.closeCPLEX(self.env)
+
+def test():
+   fh = open('siqp_cpx.log','w+')
+
+   numFeatures = 3
+   numExamples = 2
+   s = SIQPSolver(numFeatures,numExamples,10.0,fh)
+
+   d1 = cb.matrix([1,0,2],(numFeatures,1))
+   d2 = cb.matrix([1,1,0],(numFeatures,1))
+
+   s.addConstraint(d1,200.0,0,True)
+   s.addConstraint(d2,105.0,1,True)
+
+   w,slacks = s.solve()
+   print w
+   print slacks
+
+   print w.T * d1 + slacks[0]
+   print w.T * d2 + slacks[1]
+
+
+   s.delete()
+   del s
+   fh.close()
+
+if __name__ == '__main__':
+   test()
diff --git a/python/computeSpliceAlign.py b/python/computeSpliceAlign.py
new file mode 100644 (file)
index 0000000..04cbbe1
--- /dev/null
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from numpy.matlib import zeros
+import pdb
+
+
+def  computeSpliceAlign(dna, exons):
+   """
+   Exonpos: Anfang Exon bis 1 rechts neben Ende Exon
+   Daraus folgt: SpliceposAnfang ist gleich Ende Exon von Vorgaenger
+   (ausser dem letzten und ersten)
+    und Ende Splicestelle ist gleich Anfang Exon vom naechsten, da Splicestelleende notiert 1 rechts neben Splciestellenende
+   bsp: 
+   cccccA1cccccE1cccccA2 ... AmcccccEmcccccccc
+   cccccXXcccccA1cccccE1 ... Em-1cccXXcccccccc
+   """
+
+   numberOfExons = (exons.shape)[0] # how many rows ?
+   exonSizes = [-1]*numberOfExons
+
+
+   for idx in range(numberOfExons):
+      exonSizes[idx] = exons[idx,1] - exons[idx,0]
+
+   sizeMatchmatrix = 6 # -acgtn
+
+   # SpliceAlign vector describes alignment: 
+   # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
+   SpliceAlign = []
+
+   if exons[0,0] > 1:
+      SpliceAlign.extend([4]*(exons[0,0]))
+
+   #for i=1:nr_exons
+   #  exon_length = exon_sizes(i); %exons(i,2) is begin of intron
+   #  SpliceAlign = [SpliceAlign, zeros(1,exon_length)] ;
+   #  if i~= nr_exons,
+   #    intron_length = exons(i+1,1) - exons(i,2) ;
+   #    SpliceAlign = [SpliceAlign, 1, ones(1,intron_length-2)*3, 2] ;
+   #  end
+   #end 
+
+   for idx in range(numberOfExons):
+      exonLength = exonSizes[idx]
+      SpliceAlign.extend([0]*exonLength)
+      
+      if idx < numberOfExons-1:
+         intronLength = exons[idx+1,0] - exons[idx,1]
+         SpliceAlign.extend([1])
+         SpliceAlign.extend([3]*((intronLength-2)))
+         SpliceAlign.extend([2])
+         
+   if len(dna) > exons[2,1]:
+      SpliceAlign.extend([4]*(len(dna)+1-exons[2,1]))
+
+   assert len(SpliceAlign) == len(dna), pdb.set_trace()
+
+   # number of matches: just have to look at the underlying est
+   # est = dna(find(SpliceAlign==0)) # exon nucleotides
+   exonPos = [pos for pos,elem in enumerate(SpliceAlign) if elem == 0]
+   est = [elem for pos,elem in enumerate(dna) if pos in exonPos]
+
+   #length_est = sum(exon_sizes) ;
+   totalESTLength = 0
+   for elem in exonSizes:
+      totalESTLength += elem
+
+   assert totalESTLength == len(est) 
+
+   # counts the occurences of a,c,g,t,n in this order
+   numChar = [0]*sizeMatchmatrix
+
+   for elem in est:
+      if elem == 'a':
+         numChar[0] += 1
+      if elem == 'c':
+         numChar[1] += 1
+      if elem == 'g':
+         numChar[2] += 1
+      if elem == 't':
+         numChar[3] += 1
+      if elem == 'n':
+         numChar[4] += 1
+
+   totalNumChar = 0
+   for idx in range(sizeMatchmatrix):
+      totalNumChar += numChar[idx]
+
+   assert totalNumChar == len(est)
+
+   # writing in weight match matrix
+   # matrix is saved columnwise
+   trueWeightMatch = zeros((sizeMatchmatrix*sizeMatchmatrix,1)) # Scorematrix fuer Wahrheit
+   for idx in range(1,sizeMatchmatrix):
+      trueWeightMatch[(sizeMatchmatrix+1)*idx] = numChar[idx-1]
+
+   return SpliceAlign, trueWeightMatch
diff --git a/python/computeSpliceWeights.py b/python/computeSpliceWeights.py
new file mode 100644 (file)
index 0000000..e4c421c
--- /dev/null
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from numpy.matlib import zeros
+
+def calculateWeights(plf, scores):
+   """
+   3 * 30 supporting points: x_1 ... x_30
+   => 3 * 30 parameters (1 .. 90): y_1 ... y_30
+   piecewise linear function:
+
+   take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
+   these are our values x
+
+          | y_1                                          if x <= x_1
+          |
+          |        x_i+1 - x              x - x_i
+   f(x) = | y_i * -----------  +  y_i+1 * -----------    if x_i <= x <= x_i+1
+          |       x_i+1 - x_i             x_i+1 - x_i
+          |
+          | y_30                                         if x_n <= x 
+
+    y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
+   """
+   currentWeight = zeros((1,30))
+   
+   for k in range(len(scores)):
+      value = scores[k]
+      Lower = sum(d.limits <= value)
+      Upper = Lower +1 ; # x-werte bleiben fest
+
+      if Lower == 0:
+         currentWeight[1] = currentWeight[1] + 1;
+      elif:
+         Lower == length[plf.limits]
+         currentWeight[30] = currentWeight[30] + 1;
+      else:
+         weightup  = (value - plf.limits(Lower)) / (plf.limits(Upper) - plf.limits(Lower)) ;
+         weightlow = (plf.limits(Upper) - value) / (plf.limits(Upper) - plf.limits(Lower)) ;
+         currentWeight(Upper) = currentWeight(Upper) + weightup;
+         currentWeight(Lower) = currentWeight(Lower) +  weightlow;
+
+   return currentWeight
+
+
+def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
+   assert(isempty(d.transform))
+   assert(isempty(a.transform))
+   assert(isempty(h.transform))
+
+   ####################################################################################
+   # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
+   ####################################################################################
+
+   # Lege Gewichtsvektor fuer 30 supporting points an:
+   weightDon = zeros((1,30))
+
+   # Picke die Positionen raus, an denen eine Donorstelle ist
+   DonorScores = don_supp(find(SpliceAlign==1)) ;
+   assert(all(DonorScores~=-Inf)) ;
+
+   weightDon = calculateWeights(d,DonorScores)
+
+   ####################################################################################
+   # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
+   ####################################################################################
+
+   AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ; 
+   assert(all(AcceptorScores~=-Inf)) ;
+
+   #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
+   weightAcc = calculateWeights(a,AcceptorScores)
+
+
+   ####################################################################################
+   # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
+   ####################################################################################
+
+   intron_starts =  find(SpliceAlign==1) ;
+   intron_ends   =  find(SpliceAlign==2) ;
+
+   %assert: introns do not overlap
+   assert(length(intron_starts) == length(intron_ends)) ;
+   assert(all(intron_starts < intron_ends)) ; 
+   assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ; 
+
+   weightIntron = calculateWeights(h,AcceptorScores)
+
+   return weightDon, weightAcc, weightIntron
diff --git a/python/numpyUtils.py b/python/numpyUtils.py
new file mode 100644 (file)
index 0000000..14ece77
--- /dev/null
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as ny
+import numpy.matlib as nyml
+
+
+def 
diff --git a/python/paths_load_data.py b/python/paths_load_data.py
new file mode 100644 (file)
index 0000000..bd31cce
--- /dev/null
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io_pickle
+import scipy.io
+import pdb
+
+def paths_load_data(expt,genome_info,PAR):
+   """
+      
+   """
+
+   # function [Sequences, Acceptors, Donors, Exons, Ests, Noises] = paths_load_data(expt,genome_info,PAR)
+   # Load the relevant file and return the alignment data
+
+   # expt can be 'training','validation' or 'test'
+
+   assert expt in ['training','validation','test']
+
+   tmp_dir = '/fml/ag-raetsch/home/fabio/tmp'
+
+   Noises = [];
+
+   if expt == 'training':
+      if PAR.microexon:
+         if PAR.LOCAL_ALIGN: # local version
+
+            train_data = '%s/microexon_train_data_cut_local.mat' % genome_info.basedir
+            train_data = '%s/microexon_train_data.mat' % genome_info.basedir
+            #train_data_pickle = '%s/microexon_train_data_cut_local.pickle'% tmp_dir
+            #io_pickle.convert_v6(train_data,train_data_pickle)
+            #train_data = io_pickle.load(train_data_pickle)
+            data = scipy.io.loadmat(train_data)
+
+         else: # global version
+
+            train_data = '%s/microexon_train_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat' %\
+               (genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob)
+
+            train_data = '%s/microexon_train_data.mat' % genome_info.basedir
+            #train_data_pickle = '%s/microexon_train_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.pickle' %\
+            #   (tmp_dir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob)
+
+            #io_pickle.convert_v6(train_data,train_data_pickle)
+            #train_data = io_pickle.load(train_data_pickle)
+            data = scipy.io.loadmat(train_data)
+            Noises = data['TrainNoise'] # substitution matrix
+
+      else:
+         train_data = '%s/exons_train_local.mat' % genome_info.basedir
+         #train_data_pickle = '%s/exons_train_local.pickle'% tmp_dir
+         #io_pickle.convert_v6(train_data,train_data_pickle)
+         #microexon_train_data = io_pickle.load(train_data_pickle)
+         data = scipy.io.loadmat(train_data)
+
+      print 'train_data is %s' % train_data
+
+      Sequences = data['Train']       # dna sequences
+      Acceptors = data['TrainAcc']    # acceptor scores
+      Donors    = data['TrainDon']    # donor scores
+      Exons     = data['TrainExon']   # exon boundaries
+      Ests      = data['TrainEsts']   # est sequences
+     
+   #elif expt == 'validation':
+   #  print('Loading validation data\n') ;
+   #  if PAR.microexon,
+   #    if PAR.LOCAL_ALIGN
+   #      %local version
+   #      load(sprintf('%s/microexon_val_data_cut_local_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+   #         genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob),  ...
+   #      'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
+   #    else
+   #      %global version
+   #      load(sprintf('%s/microexon_val_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+   #         genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob),  ...
+   #      'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
+   #    end
+   #  else
+   #    load(sprintf('%s/exons_val_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+   #       genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob),  ...
+   #    'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
+   #  end
+   #  
+   #  Sequences = Val ;      % dna sequences
+   #  Acceptors = ValAcc ;   % acceptor scores
+   #  Donors    = ValDon ;   % donor scores
+   #  Exons     = ValExon ;  % exon boundaries
+   #  Ests      = ValEsts ;  % est sequences
+
+
+
+   #elif expt == 'test':
+   #  fprintf('Loading test data\n') ;
+   #  if PAR.microexon,
+   #    if PAR.LOCAL_ALIGN
+   #      %local version
+   #      load(sprintf('%s/microexon_test_data_cut_local_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+   #         genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+   #      'TestEsts', 'TestExon', 'Test', 'TestAcc', 'TestDon') ;
+   #    else
+   #      %global version
+   #      load(sprintf('%s/microexon_test_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+   #         genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob),  ...
+   #      'TestEsts', 'TestExon', 'Test','TestAcc', 'TestDon', 'TestNoise') ;
+   #      Noises    = TestNoise ; % substitution matrix
+   #      end
+   #      else
+   #    load(sprintf('%s/exons_test_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+   #       genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+   #    'TestEsts', 'TestExon', 'Test', 'TestAcc', 'TestDon') ;
+   #  end
+   #  
+   #  Sequences = Test ;      % dna sequences
+   #  Acceptors = TestAcc ;   % acceptor scores
+   #  Donors    = TestDon ;   % donor scores
+   #  Exons     = TestExon ;  % exon boundaries
+   #  Ests      = TestEsts ;  % est sequences
+
+   return Sequences, Acceptors, Donors, Exons, Ests, Noises
diff --git a/python/qpalma.log b/python/qpalma.log
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/python/qpalma.py b/python/qpalma.py
new file mode 100644 (file)
index 0000000..71ab47a
--- /dev/null
@@ -0,0 +1,419 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import subprocess
+import scipy.io
+from paths_load_data import *
+import pdb
+import numpy.matlib
+from set_param_palma import *
+from computeSpliceAlign import *
+
+"""
+A training method for the QPalma project
+
+Overall :
+   1. Load the data 
+   via paths_load_data
+
+   2. Create a QP using SIQP (paths_create_qp)
+   
+   3. Set initial params using set_param_palma -> palma_utils
+
+   4. computeSpliceAlign
+
+   5. compute_SpliceAlignLocal
+
+   6. computeSpliceWeights
+
+   7. myalign_local
+
+"""
+
+class Param:
+   
+   def __init__(self):
+      """ default parameters """
+
+      self.basedir = '/fml/ag-raetsch/share/projects/qpalma/elegans_palma'
+      self.MAX_MEM = 31000;
+      self.LOCAL_ALIGN = 1;
+      self.init_param = 1;
+      self.train_with_splicesitescoreinformation = 1;
+      self.train_with_intronlengthinformation = 1;
+      self.C = 0.001;
+      self.microexon = 0;
+      self.prob = 0;
+      self.organism = 'elegans'
+      self.expt = 'training'
+         
+      self.insertion_prob  = self.prob/3 ;
+      self.deletion_prob   = self.prob/3 ;
+      self.mutation_prob   = self.prob/3 ;
+
+
+#%basedir='/fml/ag-raetsch/share/projects/splicing/uta/' 
+#if ~exist('basedir','var')
+#  basedir='/fml/ag-raetsch/share/projects/palma/elegans/' ;
+#end
+#
+#%basedir='/fml/ag-raetsch/share/projects/palma/human/' ;
+
+
+class GenomeInfo:
+   """
+   GenomeInfo's init function replaces 
+   init_genome
+
+   """
+   
+   def __init__(self,gen_file):
+      """ load the info stored in genome.config """
+
+      self.est_fnames      = g_est_fnames
+      self.cdna_fnames     = g_cdna_fnames
+      self.flcdna_fnames=g_flcdna_fnames
+      self.contig_names=g_contig_names
+      self.flat_fnames=g_flat_fnames
+      self.fasta_fnames=g_fasta_fnames
+      self.annotation_fnames=g_annotation_fnames
+      self.alphabet=g_alphabet
+      self.basedir=g_basedir
+      self.max_intron_len = g_max_intron_len
+      self.min_intron_len = g_min_intron_len
+      self.merge_est_transcripts = g_merge_est_transcripts
+
+   
+def run():
+   ARGS = Param()
+
+   gen_file= '%s/genome.config' % ARGS.basedir
+
+   cmd = ['']*4
+   cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
+   cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
+   cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
+   cmd[3] = 'save genome_info.mat genome_info'  
+   full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
+
+   obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+   out,err = obj.communicate()
+   assert err == '', 'An error occured!\n%s'%err
+
+   ginfo = scipy.io.loadmat('genome_info.mat')
+   genome_info = ginfo['genome_info']
+
+   print 'genome_info.basedir is %s\n'%genome_info.basedir
+
+   Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data('training',genome_info,ARGS)
+
+   #############################################################################################
+   # Constants 
+   #############################################################################################
+
+   # number of training instances
+   N = len(Sequences) 
+   print 'Number of training examples: %d\n'% N
+
+   random_N = 100 ; # number of constraints to generate per iteration
+   iteration_steps = 200 ; #upper bound on iteration steps
+
+   # myalign
+   remove_duplicate_scores = 0
+   anzpath = 2
+   print_matrix = 0
+
+   param = numpy.matlib.rand(126,1)
+
+   [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation)
+
+   #import palma.model
+   #currentModel = palma.model.parse_file(modelFile)
+
+   #import palma_utils
+   #h,d,a,mmatrix = palma_utils.set_param(currentModel)
+
+   #############################################################################################
+   # delete splicesite-score-information
+   #############################################################################################
+
+   #if ARGS.train_with_splicesitescoreinformation:
+   #   for i in range(len(Acceptors)):
+   #    Acceptors[i](Acceptors[{i]>-20)=1 ;
+   #    Donors{i}(Donors{i}>-20)=1 ;
+   #  end 
+   #end
+
+   #############################################################################################
+   # Training
+   #############################################################################################
+   print 'Starting training...'
+
+      #The alignment matrix M looks like 
+
+      #        DNA
+
+      #      A C G T -
+      #   ------------
+      #   A
+      # 
+      #E  C
+      #S
+      #T  G
+
+      #   T
+
+      #   - 
+
+
+      #where 
+
+      #   - is a gap, 
+
+
+   numDonorSupportPoints         = 30
+   numAcceptorSupportPoints      = 30
+   numIntronLengthSupportPoints  = 30 
+   numQualityScoreSupportPoints  =  0
+
+   numFeatures = numDonorSupportPoints\
+               + numAcceptorSupportPoints\
+               + numIntronLengthSupportPoints\
+               + numQualityScoreSupportPoints
+
+   numExamples = N
+
+   from SIQP_CPX import SIQPSolver
+   C=1.0
+   
+   logfile = open('qpalma.log','w+')
+   #problem = SIQPSolver(numFeatures,numExamples,C,logfile)
+
+   #xis = zeros(1,N) ; #ninitialize slack variables
+   #num_path = anzpath*ones(1,N) ; # nr of alignments done (best path, second-best path etc.)
+   #gap = zeros(1,N) ; %? sum of differences between true alignment score and 'false' alignment scores
+
+
+   #for step_nr=1:iteration_steps,
+   iteration_nr = 1
+
+   while True:
+      print 'Iteration step: %d'% iteration_nr
+
+      for exampleId in range(numExamples):
+         if exampleId % 1000 == 0:
+            print 'Current example nr %d' % exampleId
+         dna = Sequences[exampleId] 
+         est = Ests[exampleId] 
+
+         exons = Exons[exampleId] 
+         # NoiseMatrix = Noises[exampleId] 
+         don_supp = Donors[exampleId] 
+         acc_supp = Acceptors[exampleId] 
+
+
+         # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
+         true_SpliceAlign, true_weightMatch = computeSpliceAlign(dna, exons)
+
+         pdb.set_trace()
+
+         true_weightDon, true_weightAcc, true_weightIntron = computeSpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp)
+      
+      
+      iteration_nr += 1
+      break
+
+   """
+     for id = 1:N  
+       %fprintf('%i\n', id) ;
+       if (mod(id,50)==0), fprintf(1,'.'); end
+          
+      
+       %some assertions concerning the splicesites
+       idx_don = find(don_supp~=-Inf) ;
+       assert(all(dna(idx_don)=='g')) ;
+       assert(all(dna(idx_don(idx_don<length(dna))+1)=='t' | dna(idx_don(idx_don<length(dna))+1)=='c')) ;
+       idx_acc = find(acc_supp~=-Inf) ;
+       assert(all(dna(idx_acc(idx_acc>3)-2)=='a')) ;
+       assert(all(dna(idx_acc(idx_acc>2)-1)=='g')) ;
+        
+       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+       % True Alignment and Comparison with wrong ones
+       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+       
+       %Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
+       [true_SpliceAlign, true_weightMatch] = compute_SpliceAlign_local(dna, exons) ;
+       [true_weightDon, true_weightAcc, true_weightIntron] = ...
+           compute_SpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp) ;
+       
+       double(true_weightMatch) ;
+       
+       % Berechne Gewichtsmatrix fuer aktuelle id (matrix anlegen)
+       Weights = zeros(num_path(id)+1, 126) ; %first line: true alignment, rest: wrong alignments
+       Weights(1,:) = [true_weightIntron, true_weightDon, true_weightAcc, true_weightMatch] ;
+       
+       %Score of whole alignment
+       AlignmentScores = zeros(1, num_path(id)+1) ; %first entry: true alignment
+       AlignmentScores(1) = Weights(1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ;
+      
+       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+       % Wrong Alignments
+       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+       % Compute donor, acceptor with penalty_lookup_new
+       [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ;
+       
+       %myalign wants the acceptor site on the g of the ag
+       acceptor = [acceptor(2:end) -Inf] ;
+         
+       %call myalign    
+       [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ...
+           myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
+                         remove_duplicate_scores, print_matrix);
+       
+       %[SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(1, dna, est, {h}, mmatrix, donor,
+       %acceptor, remove_duplicate_scores, print_matrix);
+       
+       %return values (are int32, have to be double later cause we compute scores
+       SpliceAlign = double(SpliceAlign') ; %column
+       weightMatch = double(weightMatch') ;
+
+
+       %test
+       for pfadNo = 1:num_path(id)
+         assert(sum(weightMatch(pfadNo,7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
+         new_weightMatch = zeros(1,36) ;
+         for iii = 1:length(dnaest{1,pfadNo})
+           if dnaest{2,pfadNo}(iii) ~= 6
+             new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii) + 1) = new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii)+1) + 1 ;
+           end
+         end
+         assert(all(new_weightMatch == weightMatch(pfadNo,:))) ;
+         assert(sum(new_weightMatch(7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
+         %reshape(new_weightMatch,6,6)
+         %reshape(weightMatch(pfadNo,:),6,6)
+       end
+       
+       %assert that it is the right file 
+       assert(all(dna(find(SpliceAlign(1,:)==1)) == 'g')) ;
+       
+       %just some info
+       %print_align(dnaest,1) ;
+       %exons(3,2)-exons(1,1)
+       %length(dnaest{1,1})
+       %keyboard
+       
+       %Berechne Gewichte der durch Alignment berechneten Pfade    
+       true_map = zeros(1,1+num_path(id)) ;
+       true_map(1) = 1 ;
+       path_loss=[] ;
+       path_loss(1) = 0 ;
+       for pfadNo = 1:num_path(id)
+         dna_numbers = dnaest{1,pfadNo} ;
+         est_numbers = dnaest{2,pfadNo} ;
+           
+         [weightDon, weightAcc, weightIntron] = ...
+             compute_SpliceWeights(d, a, h, SpliceAlign(pfadNo,:), don_supp, acc_supp) ;
+         
+         path_loss(pfadNo+1) = sum(double(SpliceAlign(pfadNo,:))~=true_SpliceAlign) ; %not too simple?
+         
+         %Gewichte in restliche Zeilen der Matrix speichern
+         Weights(pfadNo+1,:) = [weightIntron, weightDon, weightAcc, weightMatch(pfadNo,:)] ;
+         
+         AlignmentScores(pfadNo+1) = Weights(pfadNo+1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ;
+         
+         %Test, ob Alignprogr. auch das richtige Ergebnis liefert:
+         assert(abs(Gesamtscores(pfadNo) - AlignmentScores(pfadNo+1)) < 1e-6) ;
+        
+         if norm(Weights(1,:)-Weights(pfadNo+1,:))<1e-5,
+           true_map(pfadNo+1)=1 ;
+         end 
+       end
+          
+       % the true label sequence should not have a larger score than the
+       % maximal one WHYYYYY?
+       if AlignmentScores(1) >= max(AlignmentScores(2:end))+1e-6,
+         AlignmentScores
+         warning('true score > max score\n') ;
+         keyboard
+       end ;
+       
+       %%%set_param_palma should have done this right
+       for z = 1:num_path(id)
+         assert(abs(Weights(z,:) * param(1:126)' -AlignmentScores(z)) <= 1e-6) ; %abs: absolute value
+       end
+       
+       if all(true_map==1)
+         num_path(id)=num_path(id)+1 ; %next iteration step: one alignment more
+       end ;
+      
+       %Choose true and first false alignment for extending A
+       Weights = Weights([1 min(find(true_map==0))], :) ; 
+       
+       %if there is a false alignment
+       if size(Weights,1)>1 & sum((Weights(1,:)-Weights(2,:)).*param)+xis(id)<1+column_eps,
+         e=zeros(1,N) ; 
+         e(id) = 1 ;
+         A=[A;
+            Weights(2,:)-Weights(1,:) zeros(1,126) -e] ;
+         b=[b;
+            -1] ;
+         gap(id) = 1-sum((Weights(1,:)-Weights(2,:)).*param)-xis(id) ;
+       else
+         gap(id) = 0 ;
+       end ;
+     end       
+     fprintf('\n') ;
+
+
+
+      const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
+
+      objValue,w,self.slacks = solver.solve()
+     
+     # [param,xis]  = paths_qp_solve(lpenv,Q,f,A,b,LB,UB);
+     # sum_xis = sum(xis)
+     # maxgap=max(gap) 
+     [mean(xis>=1) mean(gap>=1) mean(xis>=1e-3) mean(gap>=1e-3)]
+
+     [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation) ;
+
+     save(paramfilename, ...
+          'param', 'h', 'd', 'a', 'mmatrix', 'A', 'b') ;
+
+      if max(gap)<=column_eps:
+         break
+     
+
+   assert(all(maxgap<=column_eps))
+   assert(length(take_idx)==N)
+   """
+   print 'Training completed'
+
+if __name__ == '__main__':
+   run()
+
+   """
+
+      def __init__(self):
+         numAcceptorParams = 0
+         numDonorParams = 0
+         numIntronLengthParams = 30
+         
+         # how many supporting points do we have for the 
+         # quality function per base 
+         numQualityParamsPerBase = 5
+
+         # we have quality params for each possible 
+         # for all possible combinations of 
+         # {A,C,G,T} x {A,C,G,T,-}
+         # 4 x 5
+         numQualityParams = numQualityParamsPerBase * 4 * 5
+
+         # for all possible combinations of 
+         # {A,C,G,T} x {A,C,G,T,-,*} plus
+         # {-} x {A,C,G,T,-}
+         # 1 x 5
+         numParams = numQualityParams + 1*5
+   """
diff --git a/python/set_param_palma.py b/python/set_param_palma.py
new file mode 100644 (file)
index 0000000..39eb949
--- /dev/null
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import math
+import numpy.matlib
+
+def linspace(a,b,n):
+   intervalLength = b-a
+   stepSize = intervalLength / n
+   
+   interval = [0]*n
+   for i in range(n):
+      interval[i] = a+i*stepSize
+
+   return interval
+
+  
+def logspace(a,b,n):
+   interval = [0]*n
+   intervalSize = b-a
+
+   interval[n-1] = b
+
+   for i in range(n-2,0,-1):
+      interval[i] = interval[i+1] / math.e
+
+   return interval
+
+class Plf: #means piecewise linear function
+   len = 0
+   limits = []
+   penalties = []
+   transform = ''
+   name = ''
+   max_len = 0
+   min_len = 0
+
+def set_params_pa():
+   h = plf()
+   h.len = int(model.intron_len_bins) 
+   h.limits = model.intron_len_limits 
+   h.penalties = model.intron_len_penalties 
+   h.name = 'h'
+   h.max_len   = int(max_intron_len)
+   h.min_len   = int(min_intron_len)
+   h.transform = model.intron_len_transform
+
+   d = plf()
+   d.len = int(model.donor_bins)
+   d.limits = model.donor_limits
+   d.penalties = model.donor_penalties 
+   d.name = 'd'
+   d.max_len   = 100
+   d.min_len   = -100
+
+   a = plf()
+   a.len = int(model.acceptor_bins)
+   a.limits = model.acceptor_limits 
+   a.penalties = model.acceptor_penalties 
+   a.name = 'a'
+   a.max_len   = 100
+   a.min_len   = -100
+
+   mmatrix = model.substitution_matrix 
+
+
+def set_param_palma(param, train_with_intronlengthinformation,\
+                                               min_intron_len=None, max_intron_len=None, min_svm_score=None, max_svm_score=None):
+
+   print 'Setting parameters ...'
+   
+   if min_intron_len == None:
+      if train_with_intronlengthinformation:
+         min_intron_len=20
+         max_intron_len=1000
+      else:
+         min_intron_len = 1
+         max_intron_len = 2
+
+   if min_intron_len != None and max_intron_len != None:
+      min_svm_score=-5
+      max_svm_score=5
+
+   
+   h = Plf()
+   d = Plf()
+   a = Plf()
+
+   ####################
+   # Gapfunktion
+   ####################
+   h.limits    = logspace(math.log(min_intron_len,10),math.log(max_intron_len,10),30) 
+   h.penalties = param[1:30] 
+   #h.transform = '+1' 
+   h.transform = '' 
+   h.name      = 'h' 
+   h.max_len   =  100000 
+   h.min_len   =  4 
+   h.id        = 1 
+   h.use_svm   = 0 
+   h.next_id   = 0 
+
+
+   ####################
+   # Donorfunktion
+   ####################
+   d.limits    = linspace(min_svm_score,max_svm_score,30) 
+   d.penalties = param[31:60]
+   #d.transform = '+1' 
+   d.transform = '' 
+   d.name      = 'd' 
+   d.max_len   = 100 
+   d.min_len   = -100 
+   d.id        = 1 
+   d.use_svm   = 0 
+   d.next_id   = 0 
+
+
+   ####################
+   # Acceptorfunktion
+   ####################
+   a.limits    = linspace(min_svm_score,max_svm_score,30) 
+   a.penalties = param[61:90]
+   #a.transform = '+1' 
+   a.transform = '' 
+   a.name      = 'a' 
+   a.max_len   = 100  
+   a.min_len   = -100  
+   a.id        = 1 
+   a.use_svm   = 0 
+   a.next_id   = 0 
+
+
+   ####################
+   # Matchmatrix
+   ####################
+   mmatrix = numpy.matlib.mat(param[90:126])
+   mmatrix.reshape(6,6) 
+
+   return h,d,a,mmatrix