+ fixded index bug in feature count
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Tue, 15 Jan 2008 17:04:49 +0000 (17:04 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Tue, 15 Jan 2008 17:04:49 +0000 (17:04 +0000)
+ now it works for all quality scores usually [-1,40]

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7363 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/result_align.cpp
python/Configuration.py
python/paths_load_data_solexa.py
python/qpalma.py
python/set_param_palma.py

index c98b36b..85778f8 100644 (file)
@@ -6,7 +6,8 @@ using namespace std;
 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
 
    //printf("Current index %d dna/est: %d %d\n",(estnum-1)*6+(dnanum),dnanum,estnum);
-   penalty_struct currentStruct = qparam[(estnum-1)*6+(dnanum)];
+   int currentPos = (estnum-1)*6+(dnanum);
+   penalty_struct currentStruct = qparam[currentPos];
 
       //printf("before\n");
       //int p_idx;
@@ -23,6 +24,7 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
    double value = estprb;
    int Lower = 0;
    int idx;
+
    for (idx=0;idx<currentStruct.len;idx++) {
       if (currentStruct.limits[idx] <= value)
          Lower++;
@@ -33,7 +35,7 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
          return;
    }
 
-   if (Lower == currentStruct.len) {
+   if (Lower == (currentStruct.len-1)) {
          currentStruct.penalties[currentStruct.len-1] += 1;
          return;
    }
@@ -48,7 +50,7 @@ void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double
 
    //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
 
-   qparam[(estnum-1)*5+(dnanum-1)] = currentStruct;
+   qparam[currentPos] = currentStruct;
 
    //printf("after\n");
    //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
index 3bc75ef..91771cb 100644 (file)
@@ -3,7 +3,7 @@
 
 import numpy.matlib
 
-fixedParam = numpy.matlib.mat(
+fixedParamQ = numpy.matlib.mat(
 [[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
        [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
        [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
@@ -108,13 +108,13 @@ fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
 #
 #
 
-C = 100.0
+C = 1.0
 
 # 'normal' means work like Palma 'using_quality_scores' means work like Palma
 # plus using sequencing quality scores
 
-mode = 'normal'
-#mode = 'using_quality_scores'
+#mode = 'normal'
+mode = 'using_quality_scores'
 
 # Here we specify the total number of parameters.
 
@@ -156,5 +156,9 @@ remove_duplicate_scores = False
 print_matrix            = False
 anzpath                 = 2
 
-fixedParam = fixedParam[:numFeatures]
-
+if mode == 'normal':
+   fixedParam = fixedParam[:numFeatures]
+elif mode == 'using_quality_scores':
+   fixedParam = fixedParamQ[:numFeatures]
+else:
+   assert False, 'Wrong operation mode specified'
index 33e6e25..a03f91e 100644 (file)
@@ -40,7 +40,8 @@ def paths_load_data_solexa(expt,genome_info,PAR):
 
       currentGene = allGenes[gene_id]
 
-      seq = seq.lower().replace('n','a')
+      #seq = seq.lower().replace('n','a')
+      seq = seq.lower()#.replace('n','a')
 
       if len(currentGene.exons) != 3:
          continue
index 9f47d33..1da4ec9 100644 (file)
@@ -71,6 +71,12 @@ class QPalma:
          use_quality_scores = False
       elif Configuration.mode == 'using_quality_scores':
          Sequences, Acceptors, Donors, Exons, Ests, Qualities = paths_load_data_solexa('training',self.genome_info,self.ARGS)
+
+         #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
+         #Qualities = []
+         #for i in range(len(Ests)):
+         #   Qualities.append([40]*len(Ests[i]))
+
          use_quality_scores = True
       else:
          assert(False)
@@ -130,6 +136,8 @@ class QPalma:
       totalQualityPenalties = zeros((totalQualSP,1))
 
       iteration_nr = 0
+      param_idx = 0
+      const_added_ctr = 0
       while True:
          if iteration_nr == iteration_steps:
             break
@@ -148,6 +156,9 @@ class QPalma:
             if Configuration.mode == 'using_quality_scores':
                quality = Qualities[exampleIdx]
 
+            #print 'Current quality'
+            #print 'max/min: %d,%d' % (max(quality),min(quality))
+
             exons = Exons[exampleIdx] 
             # NoiseMatrix = Noises[exampleIdx] 
             don_supp = Donors[exampleIdx] 
@@ -181,8 +192,8 @@ class QPalma:
             AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
             AlignmentScores[0] = trueAlignmentScore
 
-
             ################## Calculate wrong alignment(s) ######################
+            #print 'Generating MVs'
 
             # Compute donor, acceptor with penalty_lookup_new
             # returns two double lists
@@ -213,10 +224,13 @@ class QPalma:
             a_len = len(acceptor)
             acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
 
+            #print 'Creating alignment object with %d plifs each having %d support points' % (Configuration.numQualPlifs,Configuration.numQualSuppPoints)
             currentAlignment = QPalmaDP.Alignment(Configuration.numQualPlifs,Configuration.numQualSuppPoints)
 
             c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
 
+            #pdb.set_trace()
+
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
              est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
@@ -265,9 +279,10 @@ class QPalma:
                newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
 
             #  equals palma up to here
-            pdb.set_trace()
 
-            # print "Calling destructors"
+            #pdb.set_trace()
+
+            #print "Calling destructors"
             del c_SpliceAlign
             del c_EstAlign
             del c_WeightMatch
@@ -343,17 +358,18 @@ class QPalma:
                if firstFalseIdx != -1:
                   trueWeights       = allWeights[:,0]
                   firstFalseWeights = allWeights[:,firstFalseIdx]
-                  differenceVector  = firstFalseWeights - trueWeights
-                  pdb.set_trace()
+                  #differenceVector  = firstFalseWeights - trueWeights
+                  differenceVector  = trueWeights - firstFalseWeights
+                  #pdb.set_trace()
 
                   if not __debug__:
                      const_added = solver.addConstraint(differenceVector, exampleIdx)
-
+                     const_added_ctr += 1
                #
                # end of one example processing 
                #
 
-            # call solver every nth step
+            # call solver every nth example //added constraint
             if exampleIdx != 0 and exampleIdx % 10 == 0 and not __debug__:
                objValue,w,self.slacks = solver.solve()
       
@@ -366,8 +382,9 @@ class QPalma:
                for i in range(len(param)):
                   param[i] = w[i]
 
+               cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
+               param_idx += 1
                [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
-
             #   break
          #break
 
index 694eb2e..5b7aaec 100644 (file)
@@ -124,7 +124,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
       currentPlif.limits    = linspace(Configuration.min_qual,Configuration.max_qual,Configuration.numQualSuppPoints) 
       begin                 = lengthSP+donSP+accSP+mmatrixSP+(idx*Configuration.numQualSuppPoints)
       end                   = lengthSP+donSP+accSP+mmatrixSP+((idx+1)*Configuration.numQualSuppPoints)
-      print begin,end
+      #print begin,end
       currentPlif.penalties = param[begin:end].flatten().tolist()[0]
       currentPlif.transform = '' 
       currentPlif.name      = 'q'