+ found index bug in SWIG interface
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 7 Dec 2007 16:53:09 +0000 (16:53 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 7 Dec 2007 16:53:09 +0000 (16:53 +0000)
+ added more checks
+ adapted SIQP_CPX solver from the RFP project to work with QPalma
TODO
- check newWeights -> still a +2 offset at some positions

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6945 e1793c9e-67f9-0310-80fc-b846ff1f7b36

python/.qpalma.py.swp
python/Configuration.py [new file with mode: 0644]
python/SIQP.py
python/SIQP_CPX.py
python/computeSpliceWeights.py
python/qpalma.py

index 8deaa99..7e4cd26 100644 (file)
Binary files a/python/.qpalma.py.swp and b/python/.qpalma.py.swp differ
diff --git a/python/Configuration.py b/python/Configuration.py
new file mode 100644 (file)
index 0000000..187bc9e
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy.matlib
+
+fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
+       [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+       [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
+       [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
+       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
+       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+       [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
+       [ 0.4177651 ], [ 0.01360547], [ 0.29069319]])
+
index d8ada7e..f5008d9 100644 (file)
@@ -70,129 +70,6 @@ class SIQP:
       self.dimP = self.numFeatures + self.numExamples
       self.createRegularizer()
 
-   def parseParameterFile(self):
-      paramFile = 'Parameters.h'
-      paramFileLoc = os.path.join('/fml/ag-raetsch/home/fabio/projects/rfp_current',paramFile)
-
-      self.featMap = { 
-         'BASE_PAIR':0,
-         'HELIX_CLOSING':1,
-         'STACKING_PAIR':2,
-         'STACKING_SCORE':3,
-         'TERM_MIS':4, 
-         'HAIRPIN':5, 
-         'BULGE':6, 
-         'INTERNAL_LENGTH':7,
-         'INTERNAL_FULL':8,
-         'INTERNAL_ASYM':9,
-         'SINGLE_LEFT':10, 
-         'SINGLE_RIGHT':11 }
-
-      # ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU')
-      self.start_stops = [
-      ('BASE_PAIR_AA','BASE_PAIR_UU'), 
-      ('HELIX_CLOSING_AA','HELIX_CLOSING_UU'), 
-      ('STACKING_PAIR_AAAA','STACKING_PAIR_UUUU'),
-      ('STACKING_1_SCORE','STACKING_5_SCORE'), 
-      ('TERMINAL_MISMATCH_AAAA','TERMINAL_MISMATCH_UUUU'),
-      ('HAIRPIN_LOOP_1','HAIRPIN_LOOP_30'),
-      ('BULGE_LOOP_1','BULGE_LOOP_30'),('INTERIOR_LOOP_1','INTERIOR_LOOP_30'),
-      ('INTERNAL_LENGTH_1','INTERNAL_LENGTH_30'),
-      ('INTERNAL_FULL_1_1','INTERNAL_FULL_15_15'),
-      ('INTERNAL_ASYMMETRY_1','INTERNAL_ASYMMETRY_30'),
-      ('SINGLE_BASE_PAIR_STACKING_LEFT_AAA','SINGLE_BASE_PAIR_STACKING_LEFT_UUU'),
-      ('SINGLE_BASE_PAIR_STACKING_RIGHT_AAA','SINGLE_BASE_PAIR_STACKING_RIGHT_UUU') ]
-
-      self.param_pos = [(0,0)]*len(self.featMap)
-   
-      for line in open(paramFileLoc):
-         if line.startswith('#define') and not line.startswith('#define _'):
-            line = line.replace('#define','').strip()
-            name,value = line.split(' ')
-            value = int(value)
-
-            for key,id in self.featMap.items():
-               if name == self.start_stops[id][0]:
-                  self.param_pos[id] = (value,0)
-               if name == self.start_stops[id][1]:
-                  a,b = self.param_pos[id] 
-                  self.param_pos[id] = (a,value)
-
-      self.param_ranges = [(0,0)]*len(self.featMap)
-      self.param_coeff = [1.0]*len(self.featMap)
-
-      self.param_coeff = [
-            0.33, 2.5, 1.0, 1.0,
-            1.5, 0.5, 5.0, 3.0,
-            2.5, 5.0, 2.5, 2.0]
-
-      for key,id in self.featMap.items():
-         self.param_ranges[id] = range(self.param_pos[id][0],self.param_pos[id][1])
-
-   def regularizeTogether(self,pos1,pos2):
-      self.P[pos1,pos2] = -1.0 
-      self.P[pos2,pos1] = -1.0
-      #self.P[pos1,pos1] += 1.0
-
-   def createRNARegularizer(self):
-      self.parseParameterFile()
-
-      self.loopRanges = [ self.param_ranges[self.featMap['STACKING_SCORE']], self.param_ranges[self.featMap['HAIRPIN']],
-      self.param_ranges[self.featMap['BULGE']],self.param_ranges[self.featMap['INTERNAL_LENGTH']]]
-
-      self.P = cb.matrix(0.0,(self.dimP,self.dimP))
-
-      # first create unit matrix (only upper block)
-      for i in range(self.numFeatures):
-         self.P[i,i] = 1.0
-
-      # now add terms for loop smoothness constraints
-      #for currentRange in self.loopRanges:
-      #   for i in currentRange:
-      #      self.P[i,i+1] = -1.0 
-      #      self.P[i+1,i] = -1.0
-      #      self.P[i,i] += 1.0
-
-      b0,b1,b2,b3 = 1,4,16,64
-      posList = []
-      
-      for i in range(4):
-         for j in range(4):
-            for k in range(4):
-               for l in range(4):
-                  
-                  index1 = i*b3 + j*b2 + k*b1 + l*b0
-                  index2 = l*b3 + k*b2 + j*b1 + i*b0
-
-                  if index1 > index2:
-                     continue;
-
-                  posList.append((i,j,k,l))
-
-      stacking_start = (self.param_pos[self.featMap['STACKING_PAIR']])[0]
-
-      for pos1,elem1 in enumerate(posList):
-         for pos2,elem2 in enumerate(posList):
-            if elem1 == elem2:
-               continue
-
-            i,j,k,l = elem1
-            ip,jp,kp,lp = elem2
-
-            if (k,l,i,j) == (ip,jp,kp,lp):
-               print stacking_start+pos1,stacking_start+pos2
-               #self.regularizeTogether(stacking_start+pos1,stacking_start+pos2)
-
-      # Scale regularization terms
-      for key,id in self.featMap.items():
-         currentRange = self.param_ranges[id]
-         currentCoeff = self.param_coeff[id]
-
-         self.P[currentRange,currentRange] *= currentCoeff
-
-      for i in range(self.numFeatures,self.dimP):
-         assert self.P[i,i] == 0.0
-
    def createUnitRegularizer(self):
       # set regularizer to zero
       self.P = cb.matrix(0.0,(self.dimP,self.dimP))
@@ -202,7 +79,6 @@ class SIQP:
 
    def createRegularizer(self):
       self.createUnitRegularizer()
-      #self.createRNARegularizer()
 
       q_array = [0.0]*self.numFeatures + [1.0]*self.numExamples
       self.q = cb.matrix(q_array,(self.numFeatures+self.numExamples,1),'d')
index 6fe225a..b6635f6 100644 (file)
@@ -109,38 +109,23 @@ class SIQPSolver(SIQP):
       self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval = self.cpx_matrix(self.P)
       CPX.copyquad(self.env, self.lp, self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval)
 
-   def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
+   def addConstraint(self, energy_deltas, example_idx):
+   #def addConstraint(self, energy_deltas, loss, example_idx, useMarginRescaling):
       """
       This method adds one contraint to the optimization problem.
       """
       self.lastDelta = energy_deltas
-      self.lastLoss = loss
       self.lastIndex = example_idx
 
-      assert loss >= 0.0, 'Loss has to be non-negative!'
       assert -1 < example_idx and example_idx < self.numExamples
 
-      #if loss < self.EPSILON:
-      #    return False
-
-      #if self.old_w != None:
-      #   scalar_prod = self.old_w[0:self.numFeatures,0].trans()  * energy_deltas
-      #   old_slack = self.old_w[self.numFeatures+example_idx,0]
-      #   if useMarginRescaling:
-      #       if scalar_prod[0] + old_slack > loss: # leave some room for inaccuracies
-      #           print 'Current example already fulfills constraint.'
-      #           print >> self.protocol,  'Current example already fulfills constraint.'
-      #           return False
-
-      # printing out respective loss resp. scalar prod contributions
       if self.old_w != None:
-         print >> self.protocol, 'Loss contrib is %d'%loss
          print >> self.protocol, 'Scalar prod. contrib is %f'%((self.old_w[0:self.numFeatures].T*energy_deltas)[0])
 
       numNewRows = 1
       numNewCols = 0
    
-      row_rhs = FloatMatrix([loss]).data
+      row_rhs = FloatMatrix([1.0]).data
       row_sense = CharMatrix(['G']).data
 
       row_matbeg = IntMatrix([0]).data
@@ -210,7 +195,7 @@ class SIQPSolver(SIQP):
             sumOfSlacks += new_w[self.numFeatures+i,0]
 
          scalarProdDiff = (new_w[0:self.numFeatures,0].T * self.lastDelta)[0]
-         print >> self.protocol, 'Constraint: %f >= %d - %f'%(scalarProdDiff,self.lastLoss,new_w[self.numFeatures+self.lastIndex])
+         print >> self.protocol, 'Constraint: %f >= %d - %f'%(scalarProdDiff,self.lastLoss,new_w[self.numFeatures+self.lastIndex])
 
          # evalute parts of the objective
          regularizationPart = (0.5 *new_w.T*self.P*new_w)[0]
index 4ffe143..fcddbb4 100644 (file)
@@ -41,8 +41,8 @@ def calculateWeights(plf, scores):
          currentWeight[Upper] = currentWeight[Upper] + weightup
          currentWeight[Lower] = currentWeight[Lower] + weightlow
 
-         print value,plf.limits[Lower],plf.limits[Upper]
-         print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
+         #print value,plf.limits[Lower],plf.limits[Upper]
+         #print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
 
    return currentWeight
 
index b6333ad..d974747 100644 (file)
@@ -13,6 +13,7 @@ import scipy.io
 import pdb
 
 from numpy.matlib import mat,zeros,ones,inf
+from numpy.linalg import norm
 
 import QPalmaDP
 
@@ -127,7 +128,7 @@ class QPalma:
                Donors[i] = 1
 
       # Initialize solver 
-      #problem = SIQPSolver(numFeatures,numExamples,C,logfile)
+      solver = SIQPSolver(self.numFeatures,self.numExamples,self.C,self.logfh)
 
       # stores the number of alignments done for each example (best path, second-best path etc.)
       num_path = [anzpath]*N 
@@ -164,8 +165,8 @@ class QPalma:
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
 
-            # Reshape currentW param 
-            currentW = self.createParameterVector(trueWeightDon, trueWeightAcc, trueWeightIntron, trueWeightMatch, qualityMatrix)
+            #trueWeight = self.createParameterVector(trueWeightDon, trueWeightAcc, trueWeightIntron, trueWeightMatch, qualityMatrix)
+            trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, qualityMatrix ])
 
             #currentPhi = createFeatureVector()
 
@@ -177,11 +178,18 @@ class QPalma:
             currentPhi[126:]   = qualityMatrix[:]
 
             # Calculate w'phi(x,y) the total score of the alignment
-            trueAlignmentScore = currentW.T * currentPhi
+            trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
 
+            # The allWeights vector is supposed to store the weight parameter
+            # of the true alignment as well as the weight parameters of the
+            # num_path[exampleIdx] other alignments
+            allWeights = zeros((self.numFeatures,num_path[exampleIdx]+1))
+            allWeights[:,0] = trueWeight[:,0]
 
-            ################## Calculate wrong alignment(s) ######################
+            AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
+            AlignmentScores[0] = trueAlignmentScore
 
+            ################## Calculate wrong alignment(s) ######################
 
             # Compute donor, acceptor with penalty_lookup_new
             # returns two double lists
@@ -231,9 +239,17 @@ class QPalma:
 
             for i in range(mm_len*num_path[exampleIdx]):
                weightMatch[i] = newWeightMatch[i]
+
+            for i in range(num_path[exampleIdx]):
+               AlignmentScores[i+1] = newAlignmentScores[i]
                
             spliceAlign = spliceAlign.reshape(num_path[exampleIdx],dna_len)
             weightMatch = weightMatch.reshape(num_path[exampleIdx],mm_len)
+             
+            #pdb.set_trace()
+            #print spliceAlign[:,:]
+            #print weightMatch[:,:]
+            #print AlignmentScores
 
             # Calculate weights of the respective alignments Note that we are
             # calculating n-best alignments without any hamming loss, so we
@@ -242,12 +258,9 @@ class QPalma:
             # constraints. To keep track of the true and false alignments we
             # define an array true_map with a boolean indicating the
             # equivalence to the true alignment for each decoded alignment.
-            true_map = zeros((1,1+num_path[exampleIdx]))
+            true_map = [0]*(num_path[exampleIdx]+1)
             true_map[0] = 1
-            path_loss = [0]*num_path[exampleIdx]
-
-            AlignmentScores = [0.0]*num_path[exampleIdx]
-            Weights = zeros((num_path[exampleIdx],self.numFeatures))
+            path_loss = [0]*(num_path[exampleIdx]+1)
 
             for pathNr in range(num_path[exampleIdx]):
                #dna_numbers = dnaest{1,pathNr}
@@ -258,43 +271,55 @@ class QPalma:
                # sum up positionwise loss between alignments
                for alignPosIdx in range(len(spliceAlign[pathNr,:])):
                   if spliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
-                     path_loss[pathNr] += 1
+                     path_loss[pathNr+1] += 1
 
                # Gewichte in restliche Zeilen der Matrix speichern
                # Weights[pathNr,:] = [weightIntron, weightDon, weightAcc, weightMatch(pathNr,:)]
-               wp = self.createParameterVector(weightIntron, weightDon, weightAcc, weightMatch[pathNr,:], qualityMatrix)
-               Weights[pathNr,:] = wp.T
+               wp = numpy.vstack([weightIntron, weightDon, weightAcc, weightMatch[pathNr,:].T, qualityMatrix ])
+               allWeights[:,pathNr+1] = wp
+
+            #print allWeights[:,0]
+            #print allWeights[:,1]
+            #print allWeights[:,2]
 
                hpen = mat(h.penalties).reshape(len(h.penalties),1)
                dpen = mat(d.penalties).reshape(len(d.penalties),1)
                apen = mat(a.penalties).reshape(len(a.penalties),1)
-   
+
                features = numpy.vstack([hpen , dpen , apen , mmatrix[:]])
 
-               AlignmentScores[pathNr] = (Weights[pathNr,:] * features)[0,0]
+               AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
 
                # Check wether scalar product + loss equals viterbi score
-               # assert math.fabs(newAlignmentScores[pathNr] - AlignmentScores[pathNr]) < 1e-6, 'Scalar prod + loss is not equal Viterbi score'
-
-            #   if norm(Weights(1,:)-Weights(pathNr+1,:))<1e-5,
-            #     true_map(pathNr+1)=1
-            #     
-            #    # the true label sequence should not have a larger score than the
-            #    # maximal one WHYYYYY?
-            #    if AlignmentScores(1) >= max(AlignmentScores(2:end))+1e-6,
-            #       AlignmentScores
-            #       warning('true score > max score\n') ;
-            #       keyboard
-            #    end ;
-
-            #    if all(true_map==1)
-            #       num_path[exampleIdx]=num_path[exampleIdx]+1 ; %next iteration step: one alignment more
-            #    end ;
-
-            #    # Choose true and first false alignment for extending A
-            #    Weights = Weights([1 min(find(true_map==0))], :) ; 
+               #assert math.fabs(newAlignmentScores[pathNr] - AlignmentScores[pathNr+1]) < 1e-6,\
+               #'Scalar prod + loss is not equal Viterbi score. Respective values are %f, %f' % \
+               #(newAlignmentScores[pathNr],AlignmentScores[pathNr+1])
+
+               #  # if the pathNr-best alignment is very close to the true alignment consider it as true
+               if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
+                  true_map[pathNr+1] = 1
+
+               #  # the true label sequence should not have a larger score than the
+               #  # maximal one WHYYYYY?
+
+               #  # this means that all n-best paths are to close to each other 
+               #  # we have to extend the n-best search to a (n+1)-best
+               if len([elem for elem in true_map if elem == 1]) == len(true_map):
+                  num_path[exampleIdx] = num_path[exampleIdx]+1
+
+               # Choose true and first false alignment for extending A
+               firstFalseIdx = -1
+               for map_idx,elem in enumerate(true_map):
+                  if elem == 0:
+                     firstFalseIdx = map_idx
+                     break
+                 
+               # if there is at least one useful false alignment add the
+               # corresponding constraints to the optimization problem
+               if firstFalseIdx != -1:
+                  trueWeights        = allWeights[:,0]
+                  firstFalseWeights  = allWeights[:,firstFalseIdx]
 
-            #    # if there is a false alignment
             #    if size(Weights,1)>1 & sum((Weights(1,:)-Weights(2,:)).*param)+xis[exampleIdx]<1+column_eps,
             #       e=zeros(1,N) ; 
             #       e[exampleIdx] = 1 ;
@@ -305,17 +330,17 @@ class QPalma:
             #       gap[exampleIdx] = 0 ;
             #    end ;
 
-            # LMM.py code:
-            # deltas  = Weights(2,:)-Weights(1,:) zeros(1,126) -e
-            # const_added = solver.addConstraint(deltas, idx)
-            # 
-            # objValue,w,self.slacks = solver.solve()
+               # LMM.py code:
+               deltas  = firstFalseWeights - trueWeights
+               const_added = solver.addConstraint(deltas, exampleIdx)
+
+               objValue,w,self.slacks = solver.solve()
 
-            # sum_xis = 0
-            # for elem in self.slacks:
-            #    sum_xis +=  elem
+               sum_xis = 0
+               for elem in self.slacks:
+                  sum_xis +=  elem
 
-            if exampleIdx== 10:
+            if exampleIdx==0:
                break
 
          iteration_nr += 1