+ resolved some index bugs concerning the intron boundaries resp. splicesite
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 24 Jan 2008 14:23:12 +0000 (14:23 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 24 Jan 2008 14:23:12 +0000 (14:23 +0000)
scores

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7559 e1793c9e-67f9-0310-80fc-b846ff1f7b36

QPalmaDP/qpalma_dp.cpp
qpalma/Configuration.py
qpalma/DataProc.py
qpalma/computeSpliceWeights.py
qpalma/tools/splicesites.py
scripts/qpalma_train.py

index 0fe066f..7786745 100644 (file)
@@ -137,7 +137,6 @@ void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
   memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
   //printf("after memset...\n");
   
-
   qualityScoresAllPaths= new penalty_struct*[nr_paths];
 
   for (int z=0; z<nr_paths; z++) {
index 5aeac9b..408f517 100644 (file)
@@ -109,7 +109,7 @@ fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
 #
 #
 
-C = 100000.0
+C = 1.0
 
 # 'normal' means work like Palma 'using_quality_scores' means work like Palma
 # plus using sequencing quality scores
index 12058c0..18efe36 100644 (file)
@@ -98,6 +98,10 @@ def paths_load_data_solexa(expt,genome_info,PAR):
          #pdb.set_trace()
 
       #pdb.set_trace()
+
+      currentExons[0,1] += 1
+      #currentExons[1,0] += 1
+
       Sequences.append(currentSeq)
 
       currentSize = len(Sequences[-1])
index 59520be..1bfb21e 100644 (file)
@@ -53,12 +53,13 @@ def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp,dec=False):
 
    # Picke die Positionen raus, an denen eine Donorstelle ist
    try:
-      if dec:
-         DonorScores = [elem for pos,elem in enumerate(don_supp) if pos > 0 and SpliceAlign[pos] == 1]
-      else:
-         DonorScores = [elem for pos,elem in enumerate(don_supp) if pos > 0 and SpliceAlign[pos-1] == 1]
+      #if dec:
+      DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
+      #else:
+      #   DonorScores = [elem for pos,elem in enumerate(don_supp) if pos > 0 and SpliceAlign[pos-1] == 1]
       assert not ( -inf in DonorScores )
    except:
+      print 'Error'
       pdb.set_trace()
 
    #print 'donor'
@@ -70,12 +71,14 @@ def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp,dec=False):
 
    #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
    try:
-      if dec:
-         AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos-1] == 2]
-      else:
-         AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos] == 2]
+      #if dec:
+      #AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos-1] == 2]
+      AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos-1] == 2]
+      #else:
+      #   AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos] == 2]
       assert not ( -inf in AcceptorScores )
    except:
+      print 'Error'
       pdb.set_trace()
 
    #print 'acceptor'
index 6e0a4a3..ac64a9d 100644 (file)
@@ -377,6 +377,8 @@ def getDonAccScores(Sequences):
         #print str(exampleIdx)
         (don_score,acc_score) = ss.compute_donacc(splicescores,str(exampleIdx))
         Donors.append(don_score)
+        acc_score = [acc_score[0]] + acc_score
+        acc_score = acc_score[:-1]
         Acceptors.append(acc_score)
         print str((max(don_score),min(numpy.where(numpy.isinf(don_score),100,don_score))))
         print str((max(acc_score),min(numpy.where(numpy.isinf(acc_score),100,acc_score))))
index 92f9021..320acb4 100644 (file)
@@ -69,11 +69,14 @@ class QPalma:
       if Configuration.mode == 'normal':
          #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
          Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+
+         Donors, Acceptors = getDonAccScores(Sequences)
+
          use_quality_scores = False
       elif Configuration.mode == 'using_quality_scores':
          Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
 
-         end = 30
+         end = 50
          Sequences   = Sequences[:end]
          Exons       = Exons[:end]
          Ests        = Ests[:end]
@@ -114,12 +117,12 @@ class QPalma:
       [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
 
       # delete splicesite-score-information
-      if not self.ARGS.train_with_splicesitescoreinformation:
-         for i in range(len(Acceptors)):
-            if Acceptors[i] > -20:
-               Acceptors[i] = 1
-            if Donors[i] >-20:
-               Donors[i] = 1
+      #if not self.ARGS.train_with_splicesitescoreinformation:
+      #   for i in range(len(Acceptors)):
+      #      if Acceptors[i] > -20:
+      #         Acceptors[i] = 1
+      #      if Donors[i] >-20:
+      #         Donors[i] = 1
 
       # Initialize solver 
       if Configuration.USE_OPT:
@@ -155,6 +158,7 @@ class QPalma:
             break
 
          for exampleIdx in range(self.numExamples):
+            print 'Current example nr %d' % exampleIdx
 
             if (exampleIdx%10) == 0:
                print 'Current example nr %d' % exampleIdx
@@ -176,14 +180,16 @@ class QPalma:
             if exons[-1,1] > len(dna):
                continue
 
+            pdb.set_trace()
             # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
             trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
             
-            #pdb.set_trace()
+            print 'trueWeights' 
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
             trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
 
+
             currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
             currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
             currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
@@ -319,6 +325,7 @@ class QPalma:
             path_loss   = [0]*(num_path[exampleIdx])
 
             for pathNr in range(num_path[exampleIdx]):
+               print 'decodedWeights' 
                weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,
                h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,
                acc_supp,True)
@@ -335,6 +342,8 @@ class QPalma:
                   if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
                      path_loss[pathNr] += 1
 
+               #pdb.set_trace()
+
                # Gewichte in restliche Zeilen der Matrix speichern
                wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures])
                allWeights[:,pathNr+1] = wp