+ minor fixes
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 24 Jan 2008 17:48:37 +0000 (17:48 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 24 Jan 2008 17:48:37 +0000 (17:48 +0000)
+ some checks

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7571 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/fill_matrix.cpp
qpalma/Configuration.py
qpalma/generateEvaluationData.py
scripts/qpalma_predict.py
scripts/qpalma_train.py

index 6030a56..1425ade 100644 (file)
@@ -98,6 +98,7 @@ double getScore(struct penalty_struct* qualityScores, int mlen, int dnaChar, int
                           (currentPen.limits[idx]-baseScore)) / (currentPen.limits[idx]-currentPen.limits[idx-1]) ;  
    }
 
+   //score = lookup_penalty(&currentPen,idx, NULL, 0);
    //if(baseScore <= currentPen.limits[0]) {
    //   score = currentPen.penalties[0];
    //} else {
index 408f517..4334ef9 100644 (file)
@@ -7,6 +7,72 @@ import os.path
 fixedParamQ = numpy.matlib.mat(
 [[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
        [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+       [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+       [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
+       [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
+       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
+       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
+       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
+       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+       [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
        [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
        [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
        [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
@@ -109,7 +175,7 @@ fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
 #
 #
 
-C = 1.0
+C = 1000.0
 
 # 'normal' means work like Palma 'using_quality_scores' means work like Palma
 # plus using sequencing quality scores
@@ -152,7 +218,7 @@ mode = 'using_quality_scores'
 numDonSuppPoints     = 30
 numAccSuppPoints     = 30
 numLengthSuppPoints  = 30 
-numQualSuppPoints    = 2
+numQualSuppPoints    = 16
 
 min_qual = -1
 max_qual = 40
@@ -189,7 +255,6 @@ elif mode == 'using_quality_scores':
 else:
    assert False, 'Wrong operation mode specified'
 
-
 #
 #
 #
@@ -197,7 +262,6 @@ dna_filename         = '/fml/ag-raetsch/share/projects/qpalma/solexa/allGenes.pi
 est_filename         = '/fml/ag-raetsch/share/projects/qpalma/solexa/filteredReads_recent'
 tair7_seq_filename   = '/fml/ag-raetsch/share/projects/qpalma/solexa/tair7_seq.pickle'
 
-
 # Check for valid parameters
 assert numQualPlifs       >= 0
 assert numDonSuppPoints    > 1
index 4b5b7a0..6814855 100644 (file)
@@ -137,8 +137,6 @@ def loadArtificialData(numExamples):
 
    return s,a,d,e,est,q
 
-
-
 if __name__ == '__main__':
    Sequences, Acceptors, Donors, Exons, Ests, Qualities = generateData(10)
    print Acceptors
index d340a73..241410d 100644 (file)
@@ -70,18 +70,18 @@ class QPalma:
          use_quality_scores = False
 
       elif Conf.mode == 'using_quality_scores':
-         Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
-
-         end = 30
-         Sequences   = Sequences[:end]
-         Acceptors   = Acceptors[:end]
-         Donors      = Donors[:end]
-         Exons       = Exons[:end]
-         Ests        = Ests[:end]
-         Qualities   = Qualities[:end]
-         SplitPos    = SplitPos[:end]
-
-         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+         #Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
+         #end = 50
+         #Sequences   = Sequences[:end]
+         #Acceptors   = Acceptors[:end]
+         #Donors      = Donors[:end]
+         #Exons       = Exons[:end]
+         #Ests        = Ests[:end]
+         #Qualities   = Qualities[:end]
+         #SplitPos    = SplitPos[:end]
+
+         Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+         SplitPos = [1]*len(Qualities)
          #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
          #pdb.set_trace()
          #Qualities = []
@@ -105,7 +105,7 @@ class QPalma:
 
       # Initialize parameter vector  / param = numpy.matlib.rand(126,1)
       #param_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/python/elegans.param'
-      param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_20.pickle'
+      param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_30.pickle'
       param = load_param(param_filename)
 
       # Set the parameters such as limits penalties for the Plifs
@@ -309,27 +309,28 @@ class QPalma:
 
          #pdb.set_trace()
       
-         up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
-         print up_off,down_off 
-
-         if up_off > -1:
-            total_up_off.append(up_off)
-            total_down_off.append(down_off)
-
-      total_up = 0
-      total_down = 0 
-      for idx in range(len(total_up_off)):
-         total_up    += total_up_off[idx]
-         total_down  += total_down_off[idx]
-         
-      total_up /= len(total_up_off)
-      total_down /= len(total_down_off)
-
-      print 'Mean up_off is %f' % total_up
-      print 'Mean down_off is %f' % total_down
-      #print total_up_off
-      #print total_down_off
-      print 'len is %d' % len(total_up_off)
+         #up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+         evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+         #print up_off,down_off 
+
+         #if up_off > -1:
+         #   total_up_off.append(up_off)
+         #   total_down_off.append(down_off)
+
+      #total_up = 0
+      #total_down = 0 
+      #for idx in range(len(total_up_off)):
+      #   total_up    += total_up_off[idx]
+      #   total_down  += total_down_off[idx]
+      #   
+      #total_up /= len(total_up_off)
+      #total_down /= len(total_down_off)
+
+      #print 'Mean up_off is %f' % total_up
+      #print 'Mean down_off is %f' % total_down
+      ##print total_up_off
+      ##print total_down_off
+      #print 'len is %d' % len(total_up_off)
 
       print 'Prediction completed'
       self.logfh.close()
@@ -400,26 +401,26 @@ def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign,spos):
       else:
          oldElem = SpliceAlign[pos-1]
 
-      if oldElem != 0 and elem == 0:
-         newExons.append(pos)
-
-      if oldElem == 0 and elem != 0:
+      if oldElem == 2 and elem == 0: # start of exon
          newExons.append(pos-1)
 
+      if oldElem == 0 and elem == 1: # end of exon
+         newExons.append(pos)
+
    pdb.set_trace()
-   up_off   = -1
-   down_off = -1
+   #up_off   = -1
+   #down_off = -1
 
-   if len(newExons) != 4:
-      acc = 0.0
-   else:
-      e1_begin,e1_end = newExons[0],newExons[1]
-      e2_begin,e2_end = newExons[2],newExons[3]
+   #if len(newExons) != 4:
+   #   acc = 0.0
+   #else:
+   #   e1_begin,e1_end = newExons[0],newExons[1]
+   #   e2_begin,e2_end = newExons[2],newExons[3]
 
-      up_off   = int(math.fabs(e1_end - exons[0,1]))
-      down_off = int(math.fabs(e2_begin - exons[1,0]))
+   #   up_off   = int(math.fabs(e1_end - exons[0,1]))
+   #   down_off = int(math.fabs(e2_begin - exons[1,0]))
 
-   return up_off,down_off
+   #return up_off,down_off
 
 if __name__ == '__main__':
    qpalma = QPalma()
index 320acb4..244eb7f 100644 (file)
@@ -74,20 +74,18 @@ class QPalma:
 
          use_quality_scores = False
       elif Configuration.mode == 'using_quality_scores':
-         Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
+         #Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
 
-         end = 50
-         Sequences   = Sequences[:end]
-         Exons       = Exons[:end]
-         Ests        = Ests[:end]
-         Qualities   = Qualities[:end]
-         SplitPos    = SplitPos[:end]
+         #end = 50
+         #Sequences   = Sequences[:end]
+         #Exons       = Exons[:end]
+         #Ests        = Ests[:end]
+         #Qualities   = Qualities[:end]
+         #SplitPos    = SplitPos[:end]
 
-         Donors, Acceptors = getDonAccScores(Sequences)
-
-         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+         #Donors, Acceptors = getDonAccScores(Sequences)
 
-         #Sequences_, Acceptors_, Donors_, Exons_, Ests_, Qualities_ = generateData(100)
+         Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
 
          #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
          #Qualities = []
@@ -180,11 +178,11 @@ class QPalma:
             if exons[-1,1] > len(dna):
                continue
 
-            pdb.set_trace()
+            #pdb.set_trace()
             # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
             trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
             
-            print 'trueWeights' 
+            #print 'trueWeights' 
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
             trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
@@ -195,6 +193,7 @@ class QPalma:
             currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
             currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
 
+            #pdb.set_trace()
             if Configuration.mode == 'using_quality_scores':
                totalQualityPenalties = param[-totalQualSP:]
                currentPhi[donSP+accSP+lengthSP+mmatrixSP:]                    = totalQualityPenalties[:]
@@ -248,7 +247,6 @@ class QPalma:
             currentAlignment = QPalmaDP.Alignment(Configuration.numQualPlifs,Configuration.numQualSuppPoints, use_quality_scores)
 
             c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
-
             #print 'Calling myalign...'
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
@@ -325,7 +323,7 @@ class QPalma:
             path_loss   = [0]*(num_path[exampleIdx])
 
             for pathNr in range(num_path[exampleIdx]):
-               print 'decodedWeights' 
+               #print 'decodedWeights' 
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,
                h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,
                acc_supp,True)