+ removed obsolete code
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sun, 11 May 2008 19:04:09 +0000 (19:04 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sun, 11 May 2008 19:04:09 +0000 (19:04 +0000)
+ changed dataset loading/processing

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8979 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/qpalma_main.py

index 8b52328..0e8814a 100644 (file)
@@ -73,7 +73,7 @@ def unbracket_est(est):
 
 def getData(SeqInfo,OriginalEsts,Exons,exampleIdx,run):
    currentSeqInfo = SeqInfo[exampleIdx]
-   id,chr,strand,up_cut,down_cut,true_cut = currentSeqInfo
+   id,chr,strand,up_cut,down_cut = currentSeqInfo
 
    est = OriginalEsts[exampleIdx] 
    est = "".join(est)
@@ -265,8 +265,12 @@ class QPalma:
 
       data_filename = self.run['dataset_filename']
 
-      SeqInfo, Exons, OriginalEsts, Qualities,\
-      AlternativeSequences = paths_load_data(data_filename)
+      dataset = cPickle.load(open(data_filename))
+
+      SeqInfo, OriginalEsts, Qualities  = dataset
+
+      #SeqInfo, Exons, OriginalEsts, Qualities,\
+      #AlternativeSequences = paths_load_data(data_filename)
 
       # Load the whole dataset 
       if self.run['mode'] == 'normal':
@@ -278,7 +282,7 @@ class QPalma:
          assert(False)
 
       self.SeqInfo     = SeqInfo
-      self.Exons       = Exons
+      #self.Exons       = Exons
       self.OriginalEsts= OriginalEsts
       self.Qualities   = Qualities
 
@@ -293,7 +297,7 @@ class QPalma:
       Qualities   = Qualities[beg:end]
 
       # number of training instances
-      N = numExamples = len(SeqInfo) 
+      N = numExamples = len(SeqInfo)
       assert len(Exons) == N and len(OriginalEsts) == N and len(Qualities) == N,\
       'The Exons,Acc,Don,.. arrays are of different lengths'
 
@@ -677,7 +681,6 @@ class QPalma:
       Performing a prediction takes...
 
       """
-
       run = self.run
 
       if self.run['mode'] == 'normal':
@@ -689,24 +692,22 @@ class QPalma:
          assert(False)
 
       SeqInfo     = self.SeqInfo[beg:end]
-      #Exons       = self.Exons[beg:end]
       OriginalEsts= self.OriginalEsts[beg:end]
       Qualities   = self.Qualities[beg:end]
-      #AlternativeSequences = self.AlternativeSequences[beg:end]
 
       # number of training instances
       N = numExamples = len(SeqInfo) 
-      assert len(OriginalEsts) == N\
-      and len(Qualities) == N, 'The Exons,Acc,Don,.. arrays are of different lengths'
+      assert len(OriginalEsts) == N and len(Qualities) == N,\
+      'The Exons,Acc,Don,.. arrays are of different lengths'
 
       self.plog('Number of training examples: %d\n'% numExamples)
 
-      self.noImprovementCtr = 0
-      self.oldObjValue = 1e8
+      #self.noImprovementCtr = 0
+      #self.oldObjValue = 1e8
 
-      remove_duplicate_scores = self.run['remove_duplicate_scores']
-      print_matrix            = self.run['print_matrix']
-      anzpath                 = self.run['anzpath']
+      #remove_duplicate_scores = self.run['remove_duplicate_scores']
+      #print_matrix            = self.run['print_matrix']
+      #anzpath                 = self.run['anzpath']
 
       param = cPickle.load(open(param_filename))
 
@@ -735,18 +736,14 @@ class QPalma:
 
       # beginning of the prediction loop
       for exampleIdx in range(numExamples):
-         self.plog('Loading example nr. %d...\n'%exampleIdx)
 
          currentSeqInfo = SeqInfo[exampleIdx]
-         #chr,strand,up_cut,down_cut = currentSeqInfo 
-
-         #id,chr,strand,genomicSeq_start,genomicSeq_stop =\
-         #currentSeqInfo 
 
-         # just for debugging
          id,chr,strand,genomicSeq_start,genomicSeq_stop =\
          currentSeqInfo 
 
+         self.plog('Loading example nr. %d (id: %d)...\n'%(exampleIdx,int(id)))
+
          est = OriginalEsts[exampleIdx]
          est = unbracket_est(est)
 
@@ -759,12 +756,10 @@ class QPalma:
          if not run['enable_quality_scores']:
             quality = [40]*len(est)
 
-         #currentAlternatives = AlternativeSequences[exampleIdx]
          current_example_predictions = []
 
          # then make predictions for all dna fragments that where occurring in
          # the vmatch results
-         #chr, strand, genomicSeq_start, genomicSeq_stop = alternative_alignment
 
          if not chr in range(1,6):
             continue
@@ -811,6 +806,12 @@ class QPalma:
 
       dna = str(dna)
       est = str(est)
+
+      if '-' in est:
+         self.plog('found gap\n')
+         est = est.replace('-','')
+         assert len(est) == 36
+
       dna_len = len(dna)
       est_len = len(est)