--- /dev/null
+#!/usr/bin/env python
+# -*- coding:latin-1 -*-
+
+import sys
+import os
+import os.path
+import logging
+import math
+import pdb
+
+import cvxopt.base as cb
+
+import logging
+logging.basicConfig(level=logging.DEBUG,format='%(levelname)s %(message)s')
+
+def asymCoords(t):
+ size_upstream = t[0]
+ size_downstream = t[1]
+ assert size_upstream >= 1 and size_upstream <= 39
+ assert size_downstream >= 1 and size_downstream <= 39
+
+ if size_downstream > size_upstream:
+ asym_small = size_upstream
+ asym_big = size_downstream
+ else:
+ asym_small = size_downstream
+ asym_big = size_upstream
+
+ asym_offset = 0
+ for r in range(asym_small):
+ asym_offset += (40-r)-r;
+
+ return (asym_offset+asym_big)-1
+
+def calcTuples(i,j):
+ return filter(lambda t: t[0]<=t[1] and t[0]>0 and t[0]<40 and t[1]>0 and t[1]<40,[(i+1,j),(i-1,j),(i,j+1),(i,j-1)])
+
+class SIQP:
+ """
+ This class is a wrapper for the cvxopt qp solver.
+ It has the purpose to support an iterative addition of
+ constraints to the original problem.
+
+ We want to solve a problem of the form
+
+ min 1/2 * C * x' * P * x + q' * x
+ x
+
+ s.t.
+ < w,f+ > - < w,f- > >= 1 - xi_i for all i, for all f-
+
+ where x is a vector storing information on the parameters w_i and the slacks xi_i
+ so x = [w;xi]
+
+ """
+
+ def __init__(self,fSize,numExamples,c):
+ assert fSize > 0, 'You have to have at least one feature!'
+ assert numExamples > 0, 'You have to have at least one example!'
+ self.numFeatures = fSize
+ self.numExamples = numExamples
+ self.C = c
+ self.EPSILON = 10e-15
+
+ logging.debug("Starting SIQP solver with %d examples. A feature space dim. of %d.", numExamples,fSize)
+ logging.debug("Regularization parameters are C=%f."%(self.C))
+
+ self.old_w = None
+
+ self.dimP = self.numFeatures + self.numExamples
+ self.createRegularizer()
+
+ def parseParameterFile(self):
+ paramFile = 'Parameters.h'
+ paramFileLoc = os.path.join('/fml/ag-raetsch/home/fabio/projects/rfp_current',paramFile)
+
+ self.featMap = {
+ 'BASE_PAIR':0,
+ 'HELIX_CLOSING':1,
+ 'STACKING_PAIR':2,
+ 'STACKING_SCORE':3,
+ 'TERM_MIS':4,
+ 'HAIRPIN':5,
+ 'BULGE':6,
+ 'INTERNAL_LENGTH':7,
+ 'INTERNAL_FULL':8,
+ 'INTERNAL_ASYM':9,
+ 'SINGLE_LEFT':10,
+ 'SINGLE_RIGHT':11 }
+
+ # ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU')
+ self.start_stops = [
+ ('BASE_PAIR_AA','BASE_PAIR_UU'),
+ ('HELIX_CLOSING_AA','HELIX_CLOSING_UU'),
+ ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU'),
+ ('STACKING_1_SCORE','STACKING_5_SCORE'),
+ ('TERMINAL_MISMATCH_AAAA','TERMINAL_MISMATCH_UUUU'),
+ ('HAIRPIN_LOOP_1','HAIRPIN_LOOP_30'),
+ ('BULGE_LOOP_1','BULGE_LOOP_30'),('INTERIOR_LOOP_1','INTERIOR_LOOP_30'),
+ ('INTERNAL_LENGTH_1','INTERNAL_LENGTH_30'),
+ ('INTERNAL_FULL_1_1','INTERNAL_FULL_15_15'),
+ ('INTERNAL_ASYMMETRY_1','INTERNAL_ASYMMETRY_30'),
+ ('SINGLE_BASE_PAIR_STACKING_LEFT_AAA','SINGLE_BASE_PAIR_STACKING_LEFT_UUU'),
+ ('SINGLE_BASE_PAIR_STACKING_RIGHT_AAA','SINGLE_BASE_PAIR_STACKING_RIGHT_UUU') ]
+
+ self.param_pos = [(0,0)]*len(self.featMap)
+
+ for line in open(paramFileLoc):
+ if line.startswith('#define') and not line.startswith('#define _'):
+ line = line.replace('#define','').strip()
+ name,value = line.split(' ')
+ value = int(value)
+
+ for key,id in self.featMap.items():
+ if name == self.start_stops[id][0]:
+ self.param_pos[id] = (value,0)
+ if name == self.start_stops[id][1]:
+ a,b = self.param_pos[id]
+ self.param_pos[id] = (a,value)
+
+ self.param_ranges = [(0,0)]*len(self.featMap)
+ self.param_coeff = [1.0]*len(self.featMap)
+
+ self.param_coeff = [
+ 0.33, 2.5, 1.0, 1.0,
+ 1.5, 0.5, 5.0, 3.0,
+ 2.5, 5.0, 2.5, 2.0]
+
+ for key,id in self.featMap.items():
+ self.param_ranges[id] = range(self.param_pos[id][0],self.param_pos[id][1])
+
+ def regularizeTogether(self,pos1,pos2):
+ self.P[pos1,pos2] = -1.0
+ self.P[pos2,pos1] = -1.0
+ #self.P[pos1,pos1] += 1.0
+
+ def createRNARegularizer(self):
+ self.parseParameterFile()
+
+ self.loopRanges = [ self.param_ranges[self.featMap['STACKING_SCORE']], self.param_ranges[self.featMap['HAIRPIN']],
+ self.param_ranges[self.featMap['BULGE']],self.param_ranges[self.featMap['INTERNAL_LENGTH']]]
+
+ self.P = cb.matrix(0.0,(self.dimP,self.dimP))
+
+ # first create unit matrix (only upper block)
+ for i in range(self.numFeatures):
+ self.P[i,i] = 1.0
+
+ # now add terms for loop smoothness constraints
+ #for currentRange in self.loopRanges:
+ # for i in currentRange:
+ # self.P[i,i+1] = -1.0
+ # self.P[i+1,i] = -1.0
+ # self.P[i,i] += 1.0
+
+ b0,b1,b2,b3 = 1,4,16,64
+ posList = []
+
+ for i in range(4):
+ for j in range(4):
+ for k in range(4):
+ for l in range(4):
+
+ index1 = i*b3 + j*b2 + k*b1 + l*b0
+ index2 = l*b3 + k*b2 + j*b1 + i*b0
+
+ if index1 > index2:
+ continue;
+
+ posList.append((i,j,k,l))
+
+ stacking_start = (self.param_pos[self.featMap['STACKING_PAIR']])[0]
+
+ for pos1,elem1 in enumerate(posList):
+ for pos2,elem2 in enumerate(posList):
+ if elem1 == elem2:
+ continue
+
+ i,j,k,l = elem1
+ ip,jp,kp,lp = elem2
+
+ if (k,l,i,j) == (ip,jp,kp,lp):
+ print stacking_start+pos1,stacking_start+pos2
+ #self.regularizeTogether(stacking_start+pos1,stacking_start+pos2)
+
+ # Scale regularization terms
+ for key,id in self.featMap.items():
+ currentRange = self.param_ranges[id]
+ currentCoeff = self.param_coeff[id]
+
+ self.P[currentRange,currentRange] *= currentCoeff
+
+ for i in range(self.numFeatures,self.dimP):
+ assert self.P[i,i] == 0.0
+
+ def createUnitRegularizer(self):
+ # set regularizer to zero
+ self.P = cb.matrix(0.0,(self.dimP,self.dimP))
+ for i in range(self.numFeatures):
+ self.P[i,i] = 1.0
+ # end of zeroing regularizer
+
+ def createRegularizer(self):
+ self.createUnitRegularizer()
+ #self.createRNARegularizer()
+
+ q_array = [0.0]*self.numFeatures + [1.0]*self.numExamples
+ self.q = cb.matrix(q_array,(self.numFeatures+self.numExamples,1),'d')
+ self.q = self.C * self.q
+
+ self.G = cb.matrix(0.0,(self.numExamples,self.numFeatures+self.numExamples))
+ for i in range(self.numExamples):
+ self.G[i,self.numFeatures+i] = -1
+
+ self.h = cb.matrix(0,(self.numExamples,1),'d')
+
+if __name__ == '__main__':
+ sq = SIQP(3,2,100.0)
+
+ """
+ function [C_qp_penalties,C_qp_smoothness,C_lp_smoothness,C_qp_smallness,column_eps,Q,f,LB,UB] = paths_create_qp(N,C)
+ % function [C_qp_penalties,C_qp_smoothness,C_lp_smoothness,C_qp_smallness,column_eps,Q,f,LB,UB] = paths_create_qp(N,C)
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % qp_solve: min ( <res, f'> + 1/2 res' Q res)
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ C_qp_penalties = C ; %was 0.01
+ C_qp_smoothness = C ; %was 0.01
+ C_lp_smoothness = 0.00 ; %linear problem parameters
+ C_qp_smallness = 1e-6 ; %was 1e-6
+ column_eps = 1e-3 ;
+
+ Q=zeros(2*126+N) ; % 1/2 * res' * Q * res
+ Q(1:90,1:90) = C_qp_smallness*diag(ones(1,90)) ;
+ Q(91:126,91:126) = C_qp_penalties*diag(ones(1,36)) ;
+ Q(127:2*126,127:2*126) = C_qp_smoothness*diag(ones(1,126)) ;
+ f = [zeros(1,126) C_lp_smoothness*ones(1,126) ones(1,N)/N] ; % <res, f'>
+
+
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % qp_solve: LB <= res <= UB
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ INF = 1e20 ;
+ LB = [-INF*ones(1,126) zeros(1,126) zeros(1,N)] ; % lower bound for res
+ UB = [ INF*ones(1,2*126) INF*ones(1,N)] ; % upper bound for res
+
+ """
--- /dev/null
+#!/usr/bin/env python
+# -*- coding:latin-1 -*-
+
+import sys
+import os
+import os.path
+import logging
+import math
+
+import cvxopt.base as cb
+
+import logging
+logging.basicConfig(level=logging.DEBUG,format='%(levelname)s %(message)s')
+
+import CPX
+import numpy as ny
+import numpy.matlib as nyml
+from cplex_const import *
+
+from SIQP import SIQP
+
+OPTIMAL = [CPX_STAT_OPTIMAL, CPXMIP_OPTIMAL, CPXMIP_OPTIMAL_TOL]
+UNBOUNDED = [CPX_STAT_UNBOUNDED, CPXMIP_UNBOUNDED]
+INFEASIBLE = [CPX_STAT_INFEASIBLE, CPXMIP_INFEASIBLE, CPXMIP_INForUNBD]
+
+class FloatMatrix:
+
+ TYPE = nyml.float64
+
+ def __init__(self,list):
+ self.data = nyml.mat(list, dtype=self.TYPE).T
+
+class IntMatrix:
+
+ TYPE = nyml.int32
+
+ def __init__(self,list):
+ self.data = nyml.mat(list, dtype=self.TYPE).T
+
+class CharMatrix:
+
+ TYPE = '|S1'
+
+ def __init__(self,list):
+ self.data = nyml.mat(list, dtype=self.TYPE).T
+
+class SIQPSolver(SIQP):
+ """
+ This algorithm solves the problem
+
+ min 1/2 x'Px + q'x
+ s.t.
+ Gx >= h
+ """
+
+ # Define types and constants
+ Inf = CPX_INFBOUND
+
+ def __init__(self,fSize,numExamples,c,proto):
+ SIQP.__init__(self,fSize,numExamples,c)
+ self.numFeatures = fSize
+ self.numExamples = numExamples
+ self.numVariables = self.numFeatures + self.numExamples
+ self.old_w = None
+ self.old_objective_value = -(sys.maxint-1)
+
+ self.addedConstraints = 0
+
+ self.protocol = proto
+
+ self.objsen = CPX_MIN
+
+ # open CPLEX environment, set some parameters
+ self.env = CPX.openCPLEX()
+
+ CPX.setintparam(self.env, CPX_PARAM_SCRIND, CPX_ON) # print >> self.protocol, info to screen
+ CPX.setintparam(self.env, CPX_PARAM_DATACHECK, CPX_ON)
+
+ # create CPLEX problem, add objective and constraints to it
+ self.lp = CPX.createprob(self.env, 'test1')
+
+ self.obj = FloatMatrix([0.0]*self.numFeatures + [1.0]*self.numExamples).data
+ cFactor = 1.0*self.C/self.numExamples
+ self.obj = cFactor * self.obj
+ assert self.obj.shape == (self.numVariables,1)
+
+ self.ub = FloatMatrix([self.Inf] * self.numVariables).data
+ self.lb = FloatMatrix([-self.Inf] * self.numFeatures + [0.0]*self.numExamples).data
+
+ self.matbeg = IntMatrix([1]*self.numVariables).data
+ self.matbeg[0] = 0
+
+ self.matcnt = IntMatrix([0]*self.numVariables).data
+ self.matcnt[0] = 1
+
+ self.matind = IntMatrix([0]).data
+ self.matval = FloatMatrix([0.0]).data
+
+ self.rhs = FloatMatrix([0.0]*self.numVariables).data
+ self.sense = CharMatrix(['G']*self.numVariables).data
+
+ self.numConstraints = 1
+
+ CPX.copylp(self.env, self.lp, self.numVariables, self.numConstraints, self.objsen,
+ self.obj, self.rhs, self.sense,
+ self.matbeg, self.matcnt, self.matind, self.matval,
+ self.lb, self.ub)
+
+ self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval = self.cpx_matrix(self.P)
+ CPX.copyquad(self.env, self.lp, self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval)
+
+ def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
+ """
+ This method adds one contraint to the optimization problem.
+ """
+ self.lastDelta = energy_deltas
+ self.lastLoss = loss
+ self.lastIndex = example_idx
+
+ assert loss >= 0.0, 'Loss has to be non-negative!'
+ assert -1 < example_idx and example_idx < self.numExamples
+
+ #if loss < self.EPSILON:
+ # return False
+
+ #if self.old_w != None:
+ # scalar_prod = self.old_w[0:self.numFeatures,0].trans() * energy_deltas
+ # old_slack = self.old_w[self.numFeatures+example_idx,0]
+ # if useMarginRescaling:
+ # if scalar_prod[0] + old_slack > loss: # leave some room for inaccuracies
+ # print 'Current example already fulfills constraint.'
+ # print >> self.protocol, 'Current example already fulfills constraint.'
+ # return False
+
+ # printing out respective loss resp. scalar prod contributions
+ if self.old_w != None:
+ print >> self.protocol, 'Loss contrib is %d'%loss
+ print >> self.protocol, 'Scalar prod. contrib is %f'%((self.old_w[0:self.numFeatures].T*energy_deltas)[0])
+
+ numNewRows = 1
+ numNewCols = 0
+
+ row_rhs = FloatMatrix([loss]).data
+ row_sense = CharMatrix(['G']).data
+
+ row_matbeg = IntMatrix([0]).data
+
+ row_entries = [0.0]*self.numVariables
+ indices = [0]*self.numVariables
+
+ currentVal = 0.0
+ numNonZeroElems = 0
+ for idx in range(self.numFeatures):
+ currentVal = energy_deltas[idx]
+ if math.fabs(currentVal) > self.EPSILON:
+ row_entries[numNonZeroElems] = currentVal
+ indices[numNonZeroElems] = idx
+ numNonZeroElems += 1
+
+ row_matind = IntMatrix([0]*(numNonZeroElems+1)).data
+ row_matval = FloatMatrix([0.0]*(numNonZeroElems+1)).data
+
+ for pos in range(numNonZeroElems):
+ row_matind[pos] = indices[pos]
+ row_matval[pos] = row_entries[pos]
+
+ # coefficient 1.0 of xi
+ row_matind[numNonZeroElems] = self.numFeatures+example_idx
+ row_matval[numNonZeroElems] = 1.0
+
+ status = CPX.addrows (self.env, self.lp, numNewCols, numNewRows, numNonZeroElems+1, row_rhs,
+ row_sense, row_matbeg, row_matind, row_matval)
+
+ assert status == 0, 'Error ocurred while adding constraint.'
+
+ #CPX.writeprob(self.env,self.lp,"problem_%d.SAV"%self.addedConstraints)
+ #CPX.writeprob(self.env,self.lp,"problem_%d.LP"%self.addedConstraints)
+ self.addedConstraints += 1
+
+ return True
+
+ def solve(self):
+ # solve the problem
+ CPX.qpopt(self.env,self.lp)
+
+ # get the solution
+ lpstat = CPX.getstat(self.env,self.lp)
+ if lpstat in OPTIMAL or lpstat == 6:
+ x, objval = CPX.getx(self.env,self.lp), CPX.getobjval(self.env,self.lp)
+ print >> self.protocol, 'SIQP_CPX: Objective is: %f'%objval
+
+ # Check that objective value is monotonically increasing
+ if math.fabs(objval - self.old_objective_value) >= 10e-5:
+ assert objval >= self.old_objective_value
+
+ self.old_objective_value = objval
+
+ new_w = cb.matrix([0.0]*self.numVariables,(self.numVariables,1))
+ for i in range(self.numVariables):
+ new_w[i,0] = x[i]
+
+ self.old_w = new_w
+
+ # Check for non-negative slacks
+ sumOfSlacks = 0.0
+ print >> self.protocol, 'Slacks are:'
+ for i in range(self.numExamples):
+ print >> self.protocol, '%f ' % new_w[self.numFeatures+i,0]
+ assert new_w[self.numFeatures+i,0] >= 0.0
+ sumOfSlacks += new_w[self.numFeatures+i,0]
+
+ scalarProdDiff = (new_w[0:self.numFeatures,0].T * self.lastDelta)[0]
+ print >> self.protocol, 'Constraint: %f >= %d - %f'%(scalarProdDiff,self.lastLoss,new_w[self.numFeatures+self.lastIndex])
+
+ # evalute parts of the objective
+ regularizationPart = (0.5 *new_w.T*self.P*new_w)[0]
+ lossPart = 1.0*self.C/self.numExamples * sumOfSlacks
+ assert lossPart >= 0.0, 'Error we have a negative loss contribution'
+ print >> self.protocol, 'Parts of objective: %e %e'%(regularizationPart,lossPart)
+
+ return objval,new_w[0:self.numFeatures,0],new_w[self.numFeatures:,0]
+
+ elif lpstat in UNBOUNDED:
+ print >> self.protocol, "Solution unbounded"
+ assert False
+ elif lpstat in INFEASIBLE:
+ print >> self.protocol, "Solution infeasible"
+ assert False
+ else:
+ print >> self.protocol, "Solution code: ", lpstat
+ assert False
+
+ def cpx_matrix(self,M):
+ """
+ This method creates a sparse CPLEX representation of a given
+ cvxopt Matrix M.
+ """
+
+ nRows,nCols = M.size
+ numEntries = nRows*nCols
+
+ matbeg = IntMatrix([0]*nCols).data
+ matcnt = IntMatrix([0]*nCols).data
+
+ nonZeros = 0
+ elemCounter = 0
+ entry = 0.0
+
+ valVec = [0.0]*numEntries
+ indVec = [0.0]*numEntries
+
+ for currentCol in range(nCols):
+ nonZeros = 0
+ matbeg[currentCol] = elemCounter
+
+ for currentRow in range(nRows):
+ entry = M[currentRow,currentCol]
+ if math.fabs(entry) > self.EPSILON:
+ nonZeros += 1
+ indVec[elemCounter] = currentRow
+ valVec[elemCounter] = entry
+ elemCounter += 1
+
+ matcnt[currentCol] = nonZeros
+
+ totalNonZeros = elemCounter
+
+ matind = IntMatrix([0]*totalNonZeros).data
+ matval = FloatMatrix([0.0]*totalNonZeros).data
+
+ for idx in range(totalNonZeros):
+ matind[idx] = indVec[idx];
+ matval[idx] = valVec[idx];
+
+ print >> self.protocol, 'nRows,nCols: %d,%d'%(nRows,nCols)
+ print >> self.protocol, 'totalNonZeros ' + str(totalNonZeros)
+
+ return matbeg,matcnt,matind,matval
+
+ def cpx_matrix_id(self):
+ """
+ This method creates a sparse CPLEX representation of a given
+ cvxopt Matrix M.
+ """
+
+ nRows,nCols = self.numVariables,self.numVariables
+ matbeg = IntMatrix([0]*nCols).data
+ matcnt = IntMatrix([0]*nCols).data
+
+ matind = IntMatrix([0]*nCols).data
+ matval = FloatMatrix([0.0]*nCols).data
+
+ for i in range(nCols):
+ matbeg[i] = i
+ matcnt[i] = 1
+ matind[i] = i
+ matval[i] = 1.0
+
+ return matbeg,matcnt,matind,matval
+
+ def delete(self):
+ # free the problem
+ CPX.freeprob(self.env,self.lp)
+ # close CPLEX environment
+ CPX.closeCPLEX(self.env)
+
+def test():
+ fh = open('siqp_cpx.log','w+')
+
+ numFeatures = 3
+ numExamples = 2
+ s = SIQPSolver(numFeatures,numExamples,10.0,fh)
+
+ d1 = cb.matrix([1,0,2],(numFeatures,1))
+ d2 = cb.matrix([1,1,0],(numFeatures,1))
+
+ s.addConstraint(d1,200.0,0,True)
+ s.addConstraint(d2,105.0,1,True)
+
+ w,slacks = s.solve()
+ print w
+ print slacks
+
+ print w.T * d1 + slacks[0]
+ print w.T * d2 + slacks[1]
+
+
+ s.delete()
+ del s
+ fh.close()
+
+if __name__ == '__main__':
+ test()
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from numpy.matlib import zeros
+import pdb
+
+
+def computeSpliceAlign(dna, exons):
+ """
+ Exonpos: Anfang Exon bis 1 rechts neben Ende Exon
+ Daraus folgt: SpliceposAnfang ist gleich Ende Exon von Vorgaenger
+ (ausser dem letzten und ersten)
+ und Ende Splicestelle ist gleich Anfang Exon vom naechsten, da Splicestelleende notiert 1 rechts neben Splciestellenende
+ bsp:
+ cccccA1cccccE1cccccA2 ... AmcccccEmcccccccc
+ cccccXXcccccA1cccccE1 ... Em-1cccXXcccccccc
+ """
+
+ numberOfExons = (exons.shape)[0] # how many rows ?
+ exonSizes = [-1]*numberOfExons
+
+
+ for idx in range(numberOfExons):
+ exonSizes[idx] = exons[idx,1] - exons[idx,0]
+
+ sizeMatchmatrix = 6 # -acgtn
+
+ # SpliceAlign vector describes alignment:
+ # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
+ SpliceAlign = []
+
+ if exons[0,0] > 1:
+ SpliceAlign.extend([4]*(exons[0,0]))
+
+ #for i=1:nr_exons
+ # exon_length = exon_sizes(i); %exons(i,2) is begin of intron
+ # SpliceAlign = [SpliceAlign, zeros(1,exon_length)] ;
+ # if i~= nr_exons,
+ # intron_length = exons(i+1,1) - exons(i,2) ;
+ # SpliceAlign = [SpliceAlign, 1, ones(1,intron_length-2)*3, 2] ;
+ # end
+ #end
+
+ for idx in range(numberOfExons):
+ exonLength = exonSizes[idx]
+ SpliceAlign.extend([0]*exonLength)
+
+ if idx < numberOfExons-1:
+ intronLength = exons[idx+1,0] - exons[idx,1]
+ SpliceAlign.extend([1])
+ SpliceAlign.extend([3]*((intronLength-2)))
+ SpliceAlign.extend([2])
+
+ if len(dna) > exons[2,1]:
+ SpliceAlign.extend([4]*(len(dna)+1-exons[2,1]))
+
+ assert len(SpliceAlign) == len(dna), pdb.set_trace()
+
+ # number of matches: just have to look at the underlying est
+ # est = dna(find(SpliceAlign==0)) # exon nucleotides
+ exonPos = [pos for pos,elem in enumerate(SpliceAlign) if elem == 0]
+ est = [elem for pos,elem in enumerate(dna) if pos in exonPos]
+
+ #length_est = sum(exon_sizes) ;
+ totalESTLength = 0
+ for elem in exonSizes:
+ totalESTLength += elem
+
+ assert totalESTLength == len(est)
+
+ # counts the occurences of a,c,g,t,n in this order
+ numChar = [0]*sizeMatchmatrix
+
+ for elem in est:
+ if elem == 'a':
+ numChar[0] += 1
+ if elem == 'c':
+ numChar[1] += 1
+ if elem == 'g':
+ numChar[2] += 1
+ if elem == 't':
+ numChar[3] += 1
+ if elem == 'n':
+ numChar[4] += 1
+
+ totalNumChar = 0
+ for idx in range(sizeMatchmatrix):
+ totalNumChar += numChar[idx]
+
+ assert totalNumChar == len(est)
+
+ # writing in weight match matrix
+ # matrix is saved columnwise
+ trueWeightMatch = zeros((sizeMatchmatrix*sizeMatchmatrix,1)) # Scorematrix fuer Wahrheit
+ for idx in range(1,sizeMatchmatrix):
+ trueWeightMatch[(sizeMatchmatrix+1)*idx] = numChar[idx-1]
+
+ return SpliceAlign, trueWeightMatch
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from numpy.matlib import zeros
+
+def calculateWeights(plf, scores):
+ """
+ 3 * 30 supporting points: x_1 ... x_30
+ => 3 * 30 parameters (1 .. 90): y_1 ... y_30
+ piecewise linear function:
+
+ take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
+ these are our values x
+
+ | y_1 if x <= x_1
+ |
+ | x_i+1 - x x - x_i
+ f(x) = | y_i * ----------- + y_i+1 * ----------- if x_i <= x <= x_i+1
+ | x_i+1 - x_i x_i+1 - x_i
+ |
+ | y_30 if x_n <= x
+
+ y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
+ """
+ currentWeight = zeros((1,30))
+
+ for k in range(len(scores)):
+ value = scores[k]
+ Lower = sum(d.limits <= value)
+ Upper = Lower +1 ; # x-werte bleiben fest
+
+ if Lower == 0:
+ currentWeight[1] = currentWeight[1] + 1;
+ elif:
+ Lower == length[plf.limits]
+ currentWeight[30] = currentWeight[30] + 1;
+ else:
+ weightup = (value - plf.limits(Lower)) / (plf.limits(Upper) - plf.limits(Lower)) ;
+ weightlow = (plf.limits(Upper) - value) / (plf.limits(Upper) - plf.limits(Lower)) ;
+ currentWeight(Upper) = currentWeight(Upper) + weightup;
+ currentWeight(Lower) = currentWeight(Lower) + weightlow;
+
+ return currentWeight
+
+
+def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
+ assert(isempty(d.transform))
+ assert(isempty(a.transform))
+ assert(isempty(h.transform))
+
+ ####################################################################################
+ # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
+ ####################################################################################
+
+ # Lege Gewichtsvektor fuer 30 supporting points an:
+ weightDon = zeros((1,30))
+
+ # Picke die Positionen raus, an denen eine Donorstelle ist
+ DonorScores = don_supp(find(SpliceAlign==1)) ;
+ assert(all(DonorScores~=-Inf)) ;
+
+ weightDon = calculateWeights(d,DonorScores)
+
+ ####################################################################################
+ # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
+ ####################################################################################
+
+ AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ;
+ assert(all(AcceptorScores~=-Inf)) ;
+
+ #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
+ weightAcc = calculateWeights(a,AcceptorScores)
+
+
+ ####################################################################################
+ # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
+ ####################################################################################
+
+ intron_starts = find(SpliceAlign==1) ;
+ intron_ends = find(SpliceAlign==2) ;
+
+ %assert: introns do not overlap
+ assert(length(intron_starts) == length(intron_ends)) ;
+ assert(all(intron_starts < intron_ends)) ;
+ assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ;
+
+ weightIntron = calculateWeights(h,AcceptorScores)
+
+ return weightDon, weightAcc, weightIntron
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as ny
+import numpy.matlib as nyml
+
+
+def
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io_pickle
+import scipy.io
+import pdb
+
+def paths_load_data(expt,genome_info,PAR):
+ """
+
+ """
+
+ # function [Sequences, Acceptors, Donors, Exons, Ests, Noises] = paths_load_data(expt,genome_info,PAR)
+ # Load the relevant file and return the alignment data
+
+ # expt can be 'training','validation' or 'test'
+
+ assert expt in ['training','validation','test']
+
+ tmp_dir = '/fml/ag-raetsch/home/fabio/tmp'
+
+ Noises = [];
+
+ if expt == 'training':
+ if PAR.microexon:
+ if PAR.LOCAL_ALIGN: # local version
+
+ train_data = '%s/microexon_train_data_cut_local.mat' % genome_info.basedir
+ train_data = '%s/microexon_train_data.mat' % genome_info.basedir
+ #train_data_pickle = '%s/microexon_train_data_cut_local.pickle'% tmp_dir
+ #io_pickle.convert_v6(train_data,train_data_pickle)
+ #train_data = io_pickle.load(train_data_pickle)
+ data = scipy.io.loadmat(train_data)
+
+ else: # global version
+
+ train_data = '%s/microexon_train_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat' %\
+ (genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob)
+
+ train_data = '%s/microexon_train_data.mat' % genome_info.basedir
+ #train_data_pickle = '%s/microexon_train_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.pickle' %\
+ # (tmp_dir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob)
+
+ #io_pickle.convert_v6(train_data,train_data_pickle)
+ #train_data = io_pickle.load(train_data_pickle)
+ data = scipy.io.loadmat(train_data)
+ Noises = data['TrainNoise'] # substitution matrix
+
+ else:
+ train_data = '%s/exons_train_local.mat' % genome_info.basedir
+ #train_data_pickle = '%s/exons_train_local.pickle'% tmp_dir
+ #io_pickle.convert_v6(train_data,train_data_pickle)
+ #microexon_train_data = io_pickle.load(train_data_pickle)
+ data = scipy.io.loadmat(train_data)
+
+ print 'train_data is %s' % train_data
+
+ Sequences = data['Train'] # dna sequences
+ Acceptors = data['TrainAcc'] # acceptor scores
+ Donors = data['TrainDon'] # donor scores
+ Exons = data['TrainExon'] # exon boundaries
+ Ests = data['TrainEsts'] # est sequences
+
+ #elif expt == 'validation':
+ # print('Loading validation data\n') ;
+ # if PAR.microexon,
+ # if PAR.LOCAL_ALIGN
+ # %local version
+ # load(sprintf('%s/microexon_val_data_cut_local_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+ # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+ # 'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
+ # else
+ # %global version
+ # load(sprintf('%s/microexon_val_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+ # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+ # 'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
+ # end
+ # else
+ # load(sprintf('%s/exons_val_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+ # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+ # 'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
+ # end
+ #
+ # Sequences = Val ; % dna sequences
+ # Acceptors = ValAcc ; % acceptor scores
+ # Donors = ValDon ; % donor scores
+ # Exons = ValExon ; % exon boundaries
+ # Ests = ValEsts ; % est sequences
+
+
+
+ #elif expt == 'test':
+ # fprintf('Loading test data\n') ;
+ # if PAR.microexon,
+ # if PAR.LOCAL_ALIGN
+ # %local version
+ # load(sprintf('%s/microexon_test_data_cut_local_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+ # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+ # 'TestEsts', 'TestExon', 'Test', 'TestAcc', 'TestDon') ;
+ # else
+ # %global version
+ # load(sprintf('%s/microexon_test_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+ # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+ # 'TestEsts', 'TestExon', 'Test','TestAcc', 'TestDon', 'TestNoise') ;
+ # Noises = TestNoise ; % substitution matrix
+ # end
+ # else
+ # load(sprintf('%s/exons_test_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
+ # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
+ # 'TestEsts', 'TestExon', 'Test', 'TestAcc', 'TestDon') ;
+ # end
+ #
+ # Sequences = Test ; % dna sequences
+ # Acceptors = TestAcc ; % acceptor scores
+ # Donors = TestDon ; % donor scores
+ # Exons = TestExon ; % exon boundaries
+ # Ests = TestEsts ; % est sequences
+
+ return Sequences, Acceptors, Donors, Exons, Ests, Noises
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import subprocess
+import scipy.io
+from paths_load_data import *
+import pdb
+import numpy.matlib
+from set_param_palma import *
+from computeSpliceAlign import *
+
+"""
+A training method for the QPalma project
+
+Overall :
+
+ 1. Load the data
+ via paths_load_data
+
+ 2. Create a QP using SIQP (paths_create_qp)
+
+ 3. Set initial params using set_param_palma -> palma_utils
+
+ 4. computeSpliceAlign
+
+ 5. compute_SpliceAlignLocal
+
+ 6. computeSpliceWeights
+
+ 7. myalign_local
+
+"""
+
+class Param:
+
+ def __init__(self):
+ """ default parameters """
+
+ self.basedir = '/fml/ag-raetsch/share/projects/qpalma/elegans_palma'
+ self.MAX_MEM = 31000;
+ self.LOCAL_ALIGN = 1;
+ self.init_param = 1;
+ self.train_with_splicesitescoreinformation = 1;
+ self.train_with_intronlengthinformation = 1;
+ self.C = 0.001;
+ self.microexon = 0;
+ self.prob = 0;
+ self.organism = 'elegans'
+ self.expt = 'training'
+
+ self.insertion_prob = self.prob/3 ;
+ self.deletion_prob = self.prob/3 ;
+ self.mutation_prob = self.prob/3 ;
+
+
+#%basedir='/fml/ag-raetsch/share/projects/splicing/uta/'
+#if ~exist('basedir','var')
+# basedir='/fml/ag-raetsch/share/projects/palma/elegans/' ;
+#end
+#
+#%basedir='/fml/ag-raetsch/share/projects/palma/human/' ;
+
+
+class GenomeInfo:
+ """
+ GenomeInfo's init function replaces
+ init_genome
+
+ """
+
+ def __init__(self,gen_file):
+ """ load the info stored in genome.config """
+
+ self.est_fnames = g_est_fnames
+ self.cdna_fnames = g_cdna_fnames
+ self.flcdna_fnames=g_flcdna_fnames
+ self.contig_names=g_contig_names
+ self.flat_fnames=g_flat_fnames
+ self.fasta_fnames=g_fasta_fnames
+ self.annotation_fnames=g_annotation_fnames
+ self.alphabet=g_alphabet
+ self.basedir=g_basedir
+ self.max_intron_len = g_max_intron_len
+ self.min_intron_len = g_min_intron_len
+ self.merge_est_transcripts = g_merge_est_transcripts
+
+
+def run():
+ ARGS = Param()
+
+ gen_file= '%s/genome.config' % ARGS.basedir
+
+ cmd = ['']*4
+ cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
+ cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
+ cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
+ cmd[3] = 'save genome_info.mat genome_info'
+ full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
+
+ obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ out,err = obj.communicate()
+ assert err == '', 'An error occured!\n%s'%err
+
+ ginfo = scipy.io.loadmat('genome_info.mat')
+ genome_info = ginfo['genome_info']
+
+ print 'genome_info.basedir is %s\n'%genome_info.basedir
+
+ Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data('training',genome_info,ARGS)
+
+ #############################################################################################
+ # Constants
+ #############################################################################################
+
+ # number of training instances
+ N = len(Sequences)
+ print 'Number of training examples: %d\n'% N
+
+ random_N = 100 ; # number of constraints to generate per iteration
+ iteration_steps = 200 ; #upper bound on iteration steps
+
+ # myalign
+ remove_duplicate_scores = 0
+ anzpath = 2
+ print_matrix = 0
+
+ param = numpy.matlib.rand(126,1)
+
+ [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation)
+
+ #import palma.model
+ #currentModel = palma.model.parse_file(modelFile)
+
+ #import palma_utils
+ #h,d,a,mmatrix = palma_utils.set_param(currentModel)
+
+ #############################################################################################
+ # delete splicesite-score-information
+ #############################################################################################
+
+ #if ARGS.train_with_splicesitescoreinformation:
+ # for i in range(len(Acceptors)):
+ # Acceptors[i](Acceptors[{i]>-20)=1 ;
+ # Donors{i}(Donors{i}>-20)=1 ;
+ # end
+ #end
+
+ #############################################################################################
+ # Training
+ #############################################################################################
+ print 'Starting training...'
+
+ #The alignment matrix M looks like
+
+ # DNA
+
+ # A C G T -
+ # ------------
+ # A
+ #
+ #E C
+ #S
+ #T G
+
+ # T
+
+ # -
+
+
+ #where
+
+ # - is a gap,
+
+
+ numDonorSupportPoints = 30
+ numAcceptorSupportPoints = 30
+ numIntronLengthSupportPoints = 30
+ numQualityScoreSupportPoints = 0
+
+ numFeatures = numDonorSupportPoints\
+ + numAcceptorSupportPoints\
+ + numIntronLengthSupportPoints\
+ + numQualityScoreSupportPoints
+
+ numExamples = N
+
+ from SIQP_CPX import SIQPSolver
+ C=1.0
+
+ logfile = open('qpalma.log','w+')
+ #problem = SIQPSolver(numFeatures,numExamples,C,logfile)
+
+ #xis = zeros(1,N) ; #ninitialize slack variables
+ #num_path = anzpath*ones(1,N) ; # nr of alignments done (best path, second-best path etc.)
+ #gap = zeros(1,N) ; %? sum of differences between true alignment score and 'false' alignment scores
+
+
+ #for step_nr=1:iteration_steps,
+ iteration_nr = 1
+
+ while True:
+ print 'Iteration step: %d'% iteration_nr
+
+ for exampleId in range(numExamples):
+ if exampleId % 1000 == 0:
+ print 'Current example nr %d' % exampleId
+ dna = Sequences[exampleId]
+ est = Ests[exampleId]
+
+ exons = Exons[exampleId]
+ # NoiseMatrix = Noises[exampleId]
+ don_supp = Donors[exampleId]
+ acc_supp = Acceptors[exampleId]
+
+
+ # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
+ true_SpliceAlign, true_weightMatch = computeSpliceAlign(dna, exons)
+
+ pdb.set_trace()
+
+ true_weightDon, true_weightAcc, true_weightIntron = computeSpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp)
+
+
+ iteration_nr += 1
+ break
+
+ """
+ for id = 1:N
+ %fprintf('%i\n', id) ;
+ if (mod(id,50)==0), fprintf(1,'.'); end
+
+
+ %some assertions concerning the splicesites
+ idx_don = find(don_supp~=-Inf) ;
+ assert(all(dna(idx_don)=='g')) ;
+ assert(all(dna(idx_don(idx_don<length(dna))+1)=='t' | dna(idx_don(idx_don<length(dna))+1)=='c')) ;
+ idx_acc = find(acc_supp~=-Inf) ;
+ assert(all(dna(idx_acc(idx_acc>3)-2)=='a')) ;
+ assert(all(dna(idx_acc(idx_acc>2)-1)=='g')) ;
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % True Alignment and Comparison with wrong ones
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ %Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
+ [true_SpliceAlign, true_weightMatch] = compute_SpliceAlign_local(dna, exons) ;
+ [true_weightDon, true_weightAcc, true_weightIntron] = ...
+ compute_SpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp) ;
+
+ double(true_weightMatch) ;
+
+ % Berechne Gewichtsmatrix fuer aktuelle id (matrix anlegen)
+ Weights = zeros(num_path(id)+1, 126) ; %first line: true alignment, rest: wrong alignments
+ Weights(1,:) = [true_weightIntron, true_weightDon, true_weightAcc, true_weightMatch] ;
+
+ %Score of whole alignment
+ AlignmentScores = zeros(1, num_path(id)+1) ; %first entry: true alignment
+ AlignmentScores(1) = Weights(1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ;
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % Wrong Alignments
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % Compute donor, acceptor with penalty_lookup_new
+ [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ;
+
+ %myalign wants the acceptor site on the g of the ag
+ acceptor = [acceptor(2:end) -Inf] ;
+
+ %call myalign
+ [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ...
+ myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
+ remove_duplicate_scores, print_matrix);
+
+ %[SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(1, dna, est, {h}, mmatrix, donor,
+ %acceptor, remove_duplicate_scores, print_matrix);
+
+ %return values (are int32, have to be double later cause we compute scores
+ SpliceAlign = double(SpliceAlign') ; %column
+ weightMatch = double(weightMatch') ;
+
+
+ %test
+ for pfadNo = 1:num_path(id)
+ assert(sum(weightMatch(pfadNo,7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
+ new_weightMatch = zeros(1,36) ;
+ for iii = 1:length(dnaest{1,pfadNo})
+ if dnaest{2,pfadNo}(iii) ~= 6
+ new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii) + 1) = new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii)+1) + 1 ;
+ end
+ end
+ assert(all(new_weightMatch == weightMatch(pfadNo,:))) ;
+ assert(sum(new_weightMatch(7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
+ %reshape(new_weightMatch,6,6)
+ %reshape(weightMatch(pfadNo,:),6,6)
+ end
+
+ %assert that it is the right file
+ assert(all(dna(find(SpliceAlign(1,:)==1)) == 'g')) ;
+
+ %just some info
+ %print_align(dnaest,1) ;
+ %exons(3,2)-exons(1,1)
+ %length(dnaest{1,1})
+ %keyboard
+
+ %Berechne Gewichte der durch Alignment berechneten Pfade
+ true_map = zeros(1,1+num_path(id)) ;
+ true_map(1) = 1 ;
+ path_loss=[] ;
+ path_loss(1) = 0 ;
+ for pfadNo = 1:num_path(id)
+ dna_numbers = dnaest{1,pfadNo} ;
+ est_numbers = dnaest{2,pfadNo} ;
+
+ [weightDon, weightAcc, weightIntron] = ...
+ compute_SpliceWeights(d, a, h, SpliceAlign(pfadNo,:), don_supp, acc_supp) ;
+
+ path_loss(pfadNo+1) = sum(double(SpliceAlign(pfadNo,:))~=true_SpliceAlign) ; %not too simple?
+
+ %Gewichte in restliche Zeilen der Matrix speichern
+ Weights(pfadNo+1,:) = [weightIntron, weightDon, weightAcc, weightMatch(pfadNo,:)] ;
+
+ AlignmentScores(pfadNo+1) = Weights(pfadNo+1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ;
+
+ %Test, ob Alignprogr. auch das richtige Ergebnis liefert:
+ assert(abs(Gesamtscores(pfadNo) - AlignmentScores(pfadNo+1)) < 1e-6) ;
+
+ if norm(Weights(1,:)-Weights(pfadNo+1,:))<1e-5,
+ true_map(pfadNo+1)=1 ;
+ end
+ end
+
+ % the true label sequence should not have a larger score than the
+ % maximal one WHYYYYY?
+ if AlignmentScores(1) >= max(AlignmentScores(2:end))+1e-6,
+ AlignmentScores
+ warning('true score > max score\n') ;
+ keyboard
+ end ;
+
+ %%%set_param_palma should have done this right
+ for z = 1:num_path(id)
+ assert(abs(Weights(z,:) * param(1:126)' -AlignmentScores(z)) <= 1e-6) ; %abs: absolute value
+ end
+
+ if all(true_map==1)
+ num_path(id)=num_path(id)+1 ; %next iteration step: one alignment more
+ end ;
+
+ %Choose true and first false alignment for extending A
+ Weights = Weights([1 min(find(true_map==0))], :) ;
+
+ %if there is a false alignment
+ if size(Weights,1)>1 & sum((Weights(1,:)-Weights(2,:)).*param)+xis(id)<1+column_eps,
+ e=zeros(1,N) ;
+ e(id) = 1 ;
+ A=[A;
+ Weights(2,:)-Weights(1,:) zeros(1,126) -e] ;
+ b=[b;
+ -1] ;
+ gap(id) = 1-sum((Weights(1,:)-Weights(2,:)).*param)-xis(id) ;
+ else
+ gap(id) = 0 ;
+ end ;
+ end
+ fprintf('\n') ;
+
+
+
+ const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
+
+ objValue,w,self.slacks = solver.solve()
+
+ # [param,xis] = paths_qp_solve(lpenv,Q,f,A,b,LB,UB);
+ # sum_xis = sum(xis)
+ # maxgap=max(gap)
+ [mean(xis>=1) mean(gap>=1) mean(xis>=1e-3) mean(gap>=1e-3)]
+
+ [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation) ;
+
+ save(paramfilename, ...
+ 'param', 'h', 'd', 'a', 'mmatrix', 'A', 'b') ;
+
+ if max(gap)<=column_eps:
+ break
+
+
+ assert(all(maxgap<=column_eps))
+ assert(length(take_idx)==N)
+ """
+ print 'Training completed'
+
+if __name__ == '__main__':
+ run()
+
+ """
+
+ def __init__(self):
+ numAcceptorParams = 0
+ numDonorParams = 0
+ numIntronLengthParams = 30
+
+ # how many supporting points do we have for the
+ # quality function per base
+ numQualityParamsPerBase = 5
+
+ # we have quality params for each possible
+ # for all possible combinations of
+ # {A,C,G,T} x {A,C,G,T,-}
+ # 4 x 5
+ numQualityParams = numQualityParamsPerBase * 4 * 5
+
+ # for all possible combinations of
+ # {A,C,G,T} x {A,C,G,T,-,*} plus
+ # {-} x {A,C,G,T,-}
+ # 1 x 5
+ numParams = numQualityParams + 1*5
+ """
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import math
+import numpy.matlib
+
+def linspace(a,b,n):
+ intervalLength = b-a
+ stepSize = intervalLength / n
+
+ interval = [0]*n
+ for i in range(n):
+ interval[i] = a+i*stepSize
+
+ return interval
+
+
+def logspace(a,b,n):
+ interval = [0]*n
+ intervalSize = b-a
+
+ interval[n-1] = b
+
+ for i in range(n-2,0,-1):
+ interval[i] = interval[i+1] / math.e
+
+ return interval
+
+class Plf: #means piecewise linear function
+ len = 0
+ limits = []
+ penalties = []
+ transform = ''
+ name = ''
+ max_len = 0
+ min_len = 0
+
+def set_params_pa():
+ h = plf()
+ h.len = int(model.intron_len_bins)
+ h.limits = model.intron_len_limits
+ h.penalties = model.intron_len_penalties
+ h.name = 'h'
+ h.max_len = int(max_intron_len)
+ h.min_len = int(min_intron_len)
+ h.transform = model.intron_len_transform
+
+ d = plf()
+ d.len = int(model.donor_bins)
+ d.limits = model.donor_limits
+ d.penalties = model.donor_penalties
+ d.name = 'd'
+ d.max_len = 100
+ d.min_len = -100
+
+ a = plf()
+ a.len = int(model.acceptor_bins)
+ a.limits = model.acceptor_limits
+ a.penalties = model.acceptor_penalties
+ a.name = 'a'
+ a.max_len = 100
+ a.min_len = -100
+
+ mmatrix = model.substitution_matrix
+
+
+def set_param_palma(param, train_with_intronlengthinformation,\
+ min_intron_len=None, max_intron_len=None, min_svm_score=None, max_svm_score=None):
+
+ print 'Setting parameters ...'
+
+ if min_intron_len == None:
+ if train_with_intronlengthinformation:
+ min_intron_len=20
+ max_intron_len=1000
+ else:
+ min_intron_len = 1
+ max_intron_len = 2
+
+ if min_intron_len != None and max_intron_len != None:
+ min_svm_score=-5
+ max_svm_score=5
+
+
+ h = Plf()
+ d = Plf()
+ a = Plf()
+
+ ####################
+ # Gapfunktion
+ ####################
+ h.limits = logspace(math.log(min_intron_len,10),math.log(max_intron_len,10),30)
+ h.penalties = param[1:30]
+ #h.transform = '+1'
+ h.transform = ''
+ h.name = 'h'
+ h.max_len = 100000
+ h.min_len = 4
+ h.id = 1
+ h.use_svm = 0
+ h.next_id = 0
+
+
+ ####################
+ # Donorfunktion
+ ####################
+ d.limits = linspace(min_svm_score,max_svm_score,30)
+ d.penalties = param[31:60]
+ #d.transform = '+1'
+ d.transform = ''
+ d.name = 'd'
+ d.max_len = 100
+ d.min_len = -100
+ d.id = 1
+ d.use_svm = 0
+ d.next_id = 0
+
+
+ ####################
+ # Acceptorfunktion
+ ####################
+ a.limits = linspace(min_svm_score,max_svm_score,30)
+ a.penalties = param[61:90]
+ #a.transform = '+1'
+ a.transform = ''
+ a.name = 'a'
+ a.max_len = 100
+ a.min_len = -100
+ a.id = 1
+ a.use_svm = 0
+ a.next_id = 0
+
+
+ ####################
+ # Matchmatrix
+ ####################
+ mmatrix = numpy.matlib.mat(param[90:126])
+ mmatrix.reshape(6,6)
+
+ return h,d,a,mmatrix