+ take into account multiple hits per unique read
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 14 May 2008 16:23:07 +0000 (16:23 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 14 May 2008 16:23:07 +0000 (16:23 +0000)
+ change prediction file name to include current set_flag

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9012 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/qpalma_main.py

index 2dfc753..8af76e2 100644 (file)
@@ -246,6 +246,7 @@ class QPalma:
       os.chdir(full_working_path)
 
       self.logfh = open('_qpalma_train.log','w+')
+      cPickle.dump(run,open('run_obj.pickle','w+'))
 
       self.plog("Settings are:\n")
       self.plog("%s\n"%str(run))
@@ -586,7 +587,7 @@ class QPalma:
 #
 ###############################################################################
 
-   def predict(self,run,prediction_set,param):
+   def predict(self,run,dataset_fn,prediction_keys,param,set_name):
       """
       Performing a prediction takes...
       """
@@ -594,6 +595,8 @@ class QPalma:
 
       full_working_path = os.path.join(run['alignment_dir'],run['name'])
 
+      print 'full_working_path is %s' % full_working_path 
+
       #assert not os.path.exists(full_working_path)
       if not os.path.exists(full_working_path):
          os.mkdir(full_working_path)
@@ -603,7 +606,7 @@ class QPalma:
       # ATTENTION: Changing working directory
       os.chdir(full_working_path)
 
-      self.logfh = open('_qpalma_train.log','w+')
+      self.logfh = open('_qpalma_predict_%s.log'%set_name,'w+')
 
       if self.run['mode'] == 'normal':
          self.use_quality_scores = False
@@ -614,7 +617,17 @@ class QPalma:
          assert(False)
 
       # number of prediction instances
-      self.plog('Number of prediction examples: %d\n'% len(prediction_set))
+      self.plog('Number of prediction examples: %d\n'% len(prediction_keys))
+
+      # load dataset and fetch instances that shall be predicted
+      dataset = cPickle.load(open(dataset_fn))
+
+      prediction_set = {}
+      for key in prediction_keys:
+         prediction_set[key] = dataset[key]
+
+      # we do not need the full dataset anymore
+      del dataset
 
       # Set the parameters such as limits penalties for the Plifs
       [h,d,a,mmatrix,qualityPlifs] =\
@@ -643,58 +656,62 @@ class QPalma:
       for example_key in prediction_set.keys():
          print 'Current example %d' % example_key
 
-         currentSeqInfo,original_est,currentQualities = prediction_set[id]
+         for example in prediction_set[example_key]:
 
-         id,chr,strand,genomicSeq_start,genomicSeq_stop =\
-         currentSeqInfo 
+            currentSeqInfo,original_est,currentQualities = example
 
-         if not chr in range(1,6):
-            continue
+            id,chr,strand,genomicSeq_start,genomicSeq_stop =\
+            currentSeqInfo 
 
-         self.plog('Loading example nr. %d (id: %d)...\n'%(exampleIdx,int(id)))
+            assert id == example_key
 
-         est = original_est
-         est = unbracket_est(est)
+            if not chr in range(1,6):
+               continue
 
-         if run['mode'] == 'normal':
-            quality = [40]*len(est)
+            self.plog('Loading example id: %d...\n'% int(id))
 
-         if run['mode'] == 'using_quality_scores':
-            quality = currentQualities[0]
+            est = original_est
+            est = unbracket_est(est)
 
-         if not run['enable_quality_scores']:
-            quality = [40]*len(est)
+            if run['mode'] == 'normal':
+               quality = [40]*len(est)
 
-         current_example_predictions = []
+            if run['mode'] == 'using_quality_scores':
+               quality = currentQualities[0]
 
-         try:
-            currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
-         except:
-            problem_ctr += 1
-            continue
+            if not run['enable_quality_scores']:
+               quality = [40]*len(est)
+
+            current_example_predictions = []
 
-         if not run['enable_splice_signals']:
-            for idx,elem in enumerate(currentDon):
-               if elem != -inf:
-                  currentDon[idx] = 0.0
+            try:
+               currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
+            except:
+               problem_ctr += 1
+               continue
 
-            for idx,elem in enumerate(currentAcc):
-               if elem != -inf:
-                  currentAcc[idx] = 0.0
+            if not run['enable_splice_signals']:
+               for idx,elem in enumerate(currentDon):
+                  if elem != -inf:
+                     currentDon[idx] = 0.0
+
+               for idx,elem in enumerate(currentAcc):
+                  if elem != -inf:
+                     currentAcc[idx] = 0.0
 
-         current_prediction = self.calc_alignment(currentDNASeq, est,\
-         quality, currentDon, currentAcc, d, a, h, mmatrix, qualityPlifs)
+            current_prediction = self.calc_alignment(currentDNASeq, est,\
+            quality, currentDon, currentAcc, d, a, h, mmatrix, qualityPlifs)
 
-         current_prediction['id'] = id
-         #current_prediction['start_pos'] = up_cut
-         current_prediction['start_pos'] = genomicSeq_start
-         current_prediction['chr'] = chr
-         current_prediction['strand'] = strand
+            current_prediction['id'] = id
+            #current_prediction['start_pos'] = up_cut
+            current_prediction['start_pos'] = genomicSeq_start
+            current_prediction['chr'] = chr
+            current_prediction['strand'] = strand
 
-         allPredictions.append(current_prediction)
+            allPredictions.append(current_prediction)
 
       # end of the prediction loop we save all predictions in a pickle file and exit
-      cPickle.dump(allPredictions,open('%s_allPredictions_%s'%(run['name'],set_flag),'w+'))
+      cPickle.dump(allPredictions,open('%s.predictions.pickle'%(set_name),'w+'))
       print 'Prediction completed'
       print 'Problem ctr %d' % problem_ctr
       self.logfh.close()