--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy.matlib
+
+fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
+ [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+ [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
+ [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
+ [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
+ [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+ [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+ [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+ [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+ [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+ [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+ [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+ [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+ [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+ [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+ [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+ [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+ [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+ [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+ [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+ [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+ [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+ [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+ [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+ [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
+ [ 0.4177651 ], [ 0.01360547], [ 0.29069319]])
+
self.dimP = self.numFeatures + self.numExamples
self.createRegularizer()
- def parseParameterFile(self):
- paramFile = 'Parameters.h'
- paramFileLoc = os.path.join('/fml/ag-raetsch/home/fabio/projects/rfp_current',paramFile)
-
- self.featMap = {
- 'BASE_PAIR':0,
- 'HELIX_CLOSING':1,
- 'STACKING_PAIR':2,
- 'STACKING_SCORE':3,
- 'TERM_MIS':4,
- 'HAIRPIN':5,
- 'BULGE':6,
- 'INTERNAL_LENGTH':7,
- 'INTERNAL_FULL':8,
- 'INTERNAL_ASYM':9,
- 'SINGLE_LEFT':10,
- 'SINGLE_RIGHT':11 }
-
- # ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU')
- self.start_stops = [
- ('BASE_PAIR_AA','BASE_PAIR_UU'),
- ('HELIX_CLOSING_AA','HELIX_CLOSING_UU'),
- ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU'),
- ('STACKING_1_SCORE','STACKING_5_SCORE'),
- ('TERMINAL_MISMATCH_AAAA','TERMINAL_MISMATCH_UUUU'),
- ('HAIRPIN_LOOP_1','HAIRPIN_LOOP_30'),
- ('BULGE_LOOP_1','BULGE_LOOP_30'),('INTERIOR_LOOP_1','INTERIOR_LOOP_30'),
- ('INTERNAL_LENGTH_1','INTERNAL_LENGTH_30'),
- ('INTERNAL_FULL_1_1','INTERNAL_FULL_15_15'),
- ('INTERNAL_ASYMMETRY_1','INTERNAL_ASYMMETRY_30'),
- ('SINGLE_BASE_PAIR_STACKING_LEFT_AAA','SINGLE_BASE_PAIR_STACKING_LEFT_UUU'),
- ('SINGLE_BASE_PAIR_STACKING_RIGHT_AAA','SINGLE_BASE_PAIR_STACKING_RIGHT_UUU') ]
-
- self.param_pos = [(0,0)]*len(self.featMap)
-
- for line in open(paramFileLoc):
- if line.startswith('#define') and not line.startswith('#define _'):
- line = line.replace('#define','').strip()
- name,value = line.split(' ')
- value = int(value)
-
- for key,id in self.featMap.items():
- if name == self.start_stops[id][0]:
- self.param_pos[id] = (value,0)
- if name == self.start_stops[id][1]:
- a,b = self.param_pos[id]
- self.param_pos[id] = (a,value)
-
- self.param_ranges = [(0,0)]*len(self.featMap)
- self.param_coeff = [1.0]*len(self.featMap)
-
- self.param_coeff = [
- 0.33, 2.5, 1.0, 1.0,
- 1.5, 0.5, 5.0, 3.0,
- 2.5, 5.0, 2.5, 2.0]
-
- for key,id in self.featMap.items():
- self.param_ranges[id] = range(self.param_pos[id][0],self.param_pos[id][1])
-
- def regularizeTogether(self,pos1,pos2):
- self.P[pos1,pos2] = -1.0
- self.P[pos2,pos1] = -1.0
- #self.P[pos1,pos1] += 1.0
-
- def createRNARegularizer(self):
- self.parseParameterFile()
-
- self.loopRanges = [ self.param_ranges[self.featMap['STACKING_SCORE']], self.param_ranges[self.featMap['HAIRPIN']],
- self.param_ranges[self.featMap['BULGE']],self.param_ranges[self.featMap['INTERNAL_LENGTH']]]
-
- self.P = cb.matrix(0.0,(self.dimP,self.dimP))
-
- # first create unit matrix (only upper block)
- for i in range(self.numFeatures):
- self.P[i,i] = 1.0
-
- # now add terms for loop smoothness constraints
- #for currentRange in self.loopRanges:
- # for i in currentRange:
- # self.P[i,i+1] = -1.0
- # self.P[i+1,i] = -1.0
- # self.P[i,i] += 1.0
-
- b0,b1,b2,b3 = 1,4,16,64
- posList = []
-
- for i in range(4):
- for j in range(4):
- for k in range(4):
- for l in range(4):
-
- index1 = i*b3 + j*b2 + k*b1 + l*b0
- index2 = l*b3 + k*b2 + j*b1 + i*b0
-
- if index1 > index2:
- continue;
-
- posList.append((i,j,k,l))
-
- stacking_start = (self.param_pos[self.featMap['STACKING_PAIR']])[0]
-
- for pos1,elem1 in enumerate(posList):
- for pos2,elem2 in enumerate(posList):
- if elem1 == elem2:
- continue
-
- i,j,k,l = elem1
- ip,jp,kp,lp = elem2
-
- if (k,l,i,j) == (ip,jp,kp,lp):
- print stacking_start+pos1,stacking_start+pos2
- #self.regularizeTogether(stacking_start+pos1,stacking_start+pos2)
-
- # Scale regularization terms
- for key,id in self.featMap.items():
- currentRange = self.param_ranges[id]
- currentCoeff = self.param_coeff[id]
-
- self.P[currentRange,currentRange] *= currentCoeff
-
- for i in range(self.numFeatures,self.dimP):
- assert self.P[i,i] == 0.0
-
def createUnitRegularizer(self):
# set regularizer to zero
self.P = cb.matrix(0.0,(self.dimP,self.dimP))
def createRegularizer(self):
self.createUnitRegularizer()
- #self.createRNARegularizer()
q_array = [0.0]*self.numFeatures + [1.0]*self.numExamples
self.q = cb.matrix(q_array,(self.numFeatures+self.numExamples,1),'d')
self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval = self.cpx_matrix(self.P)
CPX.copyquad(self.env, self.lp, self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval)
- def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
+ def addConstraint(self, energy_deltas, example_idx):
+ #def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
"""
This method adds one contraint to the optimization problem.
"""
self.lastDelta = energy_deltas
- self.lastLoss = loss
self.lastIndex = example_idx
- assert loss >= 0.0, 'Loss has to be non-negative!'
assert -1 < example_idx and example_idx < self.numExamples
- #if loss < self.EPSILON:
- # return False
-
- #if self.old_w != None:
- # scalar_prod = self.old_w[0:self.numFeatures,0].trans() * energy_deltas
- # old_slack = self.old_w[self.numFeatures+example_idx,0]
- # if useMarginRescaling:
- # if scalar_prod[0] + old_slack > loss: # leave some room for inaccuracies
- # print 'Current example already fulfills constraint.'
- # print >> self.protocol, 'Current example already fulfills constraint.'
- # return False
-
- # printing out respective loss resp. scalar prod contributions
if self.old_w != None:
- print >> self.protocol, 'Loss contrib is %d'%loss
print >> self.protocol, 'Scalar prod. contrib is %f'%((self.old_w[0:self.numFeatures].T*energy_deltas)[0])
numNewRows = 1
numNewCols = 0
- row_rhs = FloatMatrix([loss]).data
+ row_rhs = FloatMatrix([1.0]).data
row_sense = CharMatrix(['G']).data
row_matbeg = IntMatrix([0]).data
sumOfSlacks += new_w[self.numFeatures+i,0]
scalarProdDiff = (new_w[0:self.numFeatures,0].T * self.lastDelta)[0]
- print >> self.protocol, 'Constraint: %f >= %d - %f'%(scalarProdDiff,self.lastLoss,new_w[self.numFeatures+self.lastIndex])
+ # print >> self.protocol, 'Constraint: %f >= %d - %f'%(scalarProdDiff,self.lastLoss,new_w[self.numFeatures+self.lastIndex])
# evalute parts of the objective
regularizationPart = (0.5 *new_w.T*self.P*new_w)[0]
currentWeight[Upper] = currentWeight[Upper] + weightup
currentWeight[Lower] = currentWeight[Lower] + weightlow
- print value,plf.limits[Lower],plf.limits[Upper]
- print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
+ #print value,plf.limits[Lower],plf.limits[Upper]
+ #print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
return currentWeight
import pdb
from numpy.matlib import mat,zeros,ones,inf
+from numpy.linalg import norm
import QPalmaDP
Donors[i] = 1
# Initialize solver
- #problem = SIQPSolver(numFeatures,numExamples,C,logfile)
+ solver = SIQPSolver(self.numFeatures,self.numExamples,self.C,self.logfh)
# stores the number of alignments done for each example (best path, second-best path etc.)
num_path = [anzpath]*N
# Calculate the weights
trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
- # Reshape currentW param
- currentW = self.createParameterVector(trueWeightDon, trueWeightAcc, trueWeightIntron, trueWeightMatch, qualityMatrix)
+ #trueWeight = self.createParameterVector(trueWeightDon, trueWeightAcc, trueWeightIntron, trueWeightMatch, qualityMatrix)
+ trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, qualityMatrix ])
#currentPhi = createFeatureVector()
currentPhi[126:] = qualityMatrix[:]
# Calculate w'phi(x,y) the total score of the alignment
- trueAlignmentScore = currentW.T * currentPhi
+ trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
+ # The allWeights vector is supposed to store the weight parameter
+ # of the true alignment as well as the weight parameters of the
+ # num_path[exampleIdx] other alignments
+ allWeights = zeros((self.numFeatures,num_path[exampleIdx]+1))
+ allWeights[:,0] = trueWeight[:,0]
- ################## Calculate wrong alignment(s) ######################
+ AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
+ AlignmentScores[0] = trueAlignmentScore
+ ################## Calculate wrong alignment(s) ######################
# Compute donor, acceptor with penalty_lookup_new
# returns two double lists
for i in range(mm_len*num_path[exampleIdx]):
weightMatch[i] = newWeightMatch[i]
+
+ for i in range(num_path[exampleIdx]):
+ AlignmentScores[i+1] = newAlignmentScores[i]
spliceAlign = spliceAlign.reshape(num_path[exampleIdx],dna_len)
weightMatch = weightMatch.reshape(num_path[exampleIdx],mm_len)
+
+ #pdb.set_trace()
+ #print spliceAlign[:,:]
+ #print weightMatch[:,:]
+ #print AlignmentScores
# Calculate weights of the respective alignments Note that we are
# calculating n-best alignments without any hamming loss, so we
# constraints. To keep track of the true and false alignments we
# define an array true_map with a boolean indicating the
# equivalence to the true alignment for each decoded alignment.
- true_map = zeros((1,1+num_path[exampleIdx]))
+ true_map = [0]*(num_path[exampleIdx]+1)
true_map[0] = 1
- path_loss = [0]*num_path[exampleIdx]
-
- AlignmentScores = [0.0]*num_path[exampleIdx]
- Weights = zeros((num_path[exampleIdx],self.numFeatures))
+ path_loss = [0]*(num_path[exampleIdx]+1)
for pathNr in range(num_path[exampleIdx]):
#dna_numbers = dnaest{1,pathNr}
# sum up positionwise loss between alignments
for alignPosIdx in range(len(spliceAlign[pathNr,:])):
if spliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
- path_loss[pathNr] += 1
+ path_loss[pathNr+1] += 1
# Gewichte in restliche Zeilen der Matrix speichern
# Weights[pathNr,:] = [weightIntron, weightDon, weightAcc, weightMatch(pathNr,:)]
- wp = self.createParameterVector(weightIntron, weightDon, weightAcc, weightMatch[pathNr,:], qualityMatrix)
- Weights[pathNr,:] = wp.T
+ wp = numpy.vstack([weightIntron, weightDon, weightAcc, weightMatch[pathNr,:].T, qualityMatrix ])
+ allWeights[:,pathNr+1] = wp
+
+ #print allWeights[:,0]
+ #print allWeights[:,1]
+ #print allWeights[:,2]
hpen = mat(h.penalties).reshape(len(h.penalties),1)
dpen = mat(d.penalties).reshape(len(d.penalties),1)
apen = mat(a.penalties).reshape(len(a.penalties),1)
-
+
features = numpy.vstack([hpen , dpen , apen , mmatrix[:]])
- AlignmentScores[pathNr] = (Weights[pathNr,:] * features)[0,0]
+ AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
# Check wether scalar product + loss equals viterbi score
- # assert math.fabs(newAlignmentScores[pathNr] - AlignmentScores[pathNr]) < 1e-6, 'Scalar prod + loss is not equal Viterbi score'
-
- # if norm(Weights(1,:)-Weights(pathNr+1,:))<1e-5,
- # true_map(pathNr+1)=1
- #
- # # the true label sequence should not have a larger score than the
- # # maximal one WHYYYYY?
- # if AlignmentScores(1) >= max(AlignmentScores(2:end))+1e-6,
- # AlignmentScores
- # warning('true score > max score\n') ;
- # keyboard
- # end ;
-
- # if all(true_map==1)
- # num_path[exampleIdx]=num_path[exampleIdx]+1 ; %next iteration step: one alignment more
- # end ;
-
- # # Choose true and first false alignment for extending A
- # Weights = Weights([1 min(find(true_map==0))], :) ;
+ #assert math.fabs(newAlignmentScores[pathNr] - AlignmentScores[pathNr+1]) < 1e-6,\
+ #'Scalar prod + loss is not equal Viterbi score. Respective values are %f, %f' % \
+ #(newAlignmentScores[pathNr],AlignmentScores[pathNr+1])
+
+ # # if the pathNr-best alignment is very close to the true alignment consider it as true
+ if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
+ true_map[pathNr+1] = 1
+
+ # # the true label sequence should not have a larger score than the
+ # # maximal one WHYYYYY?
+
+ # # this means that all n-best paths are to close to each other
+ # # we have to extend the n-best search to a (n+1)-best
+ if len([elem for elem in true_map if elem == 1]) == len(true_map):
+ num_path[exampleIdx] = num_path[exampleIdx]+1
+
+ # Choose true and first false alignment for extending A
+ firstFalseIdx = -1
+ for map_idx,elem in enumerate(true_map):
+ if elem == 0:
+ firstFalseIdx = map_idx
+ break
+
+ # if there is at least one useful false alignment add the
+ # corresponding constraints to the optimization problem
+ if firstFalseIdx != -1:
+ trueWeights = allWeights[:,0]
+ firstFalseWeights = allWeights[:,firstFalseIdx]
- # # if there is a false alignment
# if size(Weights,1)>1 & sum((Weights(1,:)-Weights(2,:)).*param)+xis[exampleIdx]<1+column_eps,
# e=zeros(1,N) ;
# e[exampleIdx] = 1 ;
# gap[exampleIdx] = 0 ;
# end ;
- # LMM.py code:
- # deltas = Weights(2,:)-Weights(1,:) zeros(1,126) -e
- # const_added = solver.addConstraint(deltas, idx)
- #
- # objValue,w,self.slacks = solver.solve()
+ # LMM.py code:
+ deltas = firstFalseWeights - trueWeights
+ const_added = solver.addConstraint(deltas, exampleIdx)
+
+ objValue,w,self.slacks = solver.solve()
- # sum_xis = 0
- # for elem in self.slacks:
- # sum_xis += elem
+ sum_xis = 0
+ for elem in self.slacks:
+ sum_xis += elem
- if exampleIdx== 10:
+ if exampleIdx==0:
break
iteration_nr += 1