+ fixed some index bugs in the evaluation
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sat, 19 Apr 2008 01:54:03 +0000 (01:54 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sat, 19 Apr 2008 01:54:03 +0000 (01:54 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8698 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/Evaluation.py

index 7fa0363..af9bccb 100644 (file)
@@ -3,12 +3,14 @@
 
 import cPickle
 import sys
-import pydb
 import pdb
 import os
 import os.path
 import math
 
+from qpalma.parsers import *
+
+
 data = None
 
 
@@ -306,6 +308,7 @@ def perform_prediction(current_dir,run_name):
       os.system(cmd)
 
 
+
 def forall_experiments(current_func,tl_dir):
    """
    Given the toplevel directoy this function calls for each subdir the
@@ -337,13 +340,104 @@ def forall_experiments(current_func,tl_dir):
    #createTable(all_results)
 
 
+
+def predict_on(filename,filtered_reads):
+
+   print 'parsing filtered reads..'
+   all_filtered_reads = parse_filtered_reads(filtered_reads)
+   print 'found %d filtered reads' % len(all_filtered_reads)
+
+   allPredictions = cPickle.load(open(filename))
+
+   pos_correct_ctr   = 0
+   pos_incorrect_ctr = 0
+
+   score_correct_ctr = 0
+   score_incorrect_ctr = 0
+
+   total_vmatch_instances_ctr = 0
+
+
+   for current_prediction in allPredictions:
+      id = current_prediction['id']
+      current_ground_truth = all_filtered_reads[id]
+
+      start_pos = current_prediction['start_pos']
+      chr = current_prediction['chr']
+      strand = current_prediction['strand']
+
+      #score = current_prediction['DPScores'].flatten().tolist()[0][0]
+
+      #pdb.set_trace()
+
+      predExons = current_prediction['predExons'] #:newExons, 'dna':dna, 'est':est
+      predExons = [e+start_pos for e in predExons]
+      if len(predExons) == 4:
+         predExons[1] -= 1
+         predExons[3] -= 1
+
+         cut_pos = current_ground_truth['true_cut']
+         p_start = current_ground_truth['p_start']
+         e_stop = current_ground_truth['exon_stop']
+         e_start = current_ground_truth['exon_start']
+         p_stop = current_ground_truth['p_stop']
+
+         true_cut = current_ground_truth['true_cut']
+
+         if p_start == predExons[0] and e_stop == predExons[1] and\
+         e_start == predExons[2] and p_stop == predExons[3]:
+            pos_correct_ctr += 1
+         else:
+            pos_incorrect_ctr += 1
+            #pdb.set_trace()
+
+      elif len(predExons) == 2:
+         predExons[1] -= 1
+
+         cut_pos = current_ground_truth['true_cut']
+         p_start = current_ground_truth['p_start']
+         p_stop = current_ground_truth['p_stop']
+
+         true_cut = current_ground_truth['true_cut']
+
+         if p_start == predExons[0] and p_stop == predExons[1]:
+            pos_correct_ctr += 1
+         else:
+            pos_incorrect_ctr += 1
+            #pdb.set_trace()
+
+      else:
+         pass
+      ## check whether the correct predictions score higher than the incorrect
+      ## ones
+      #cmp_res = compare_scores_and_labels(current_scores,current_labels)
+      #if cmp_res:
+      #   score_correct_ctr += 1
+      #else:
+      #   score_incorrect_ctr += 1
+
+   numPredictions = len(allPredictions)
+
+   # now that we have evaluated all instances put out all counters and sizes
+   print 'Total num. of examples: %d' % numPredictions
+   print 'Correct pos: %2.3f, incorrect pos: %2.3f' %\
+   (pos_correct_ctr/(1.0*numPredictions),pos_incorrect_ctr/(1.0*numPredictions))
+
+   #print 'Correct scores: %d, incorrect scores: %d' %\
+   #(score_correct_ctr,score_incorrect_ctr)
+
+   #pos_error   = 1.0 * pos_incorrect_ctr / true_vmatch_instances_ctr
+   #score_error = 1.0 * score_incorrect_ctr / total_vmatch_instances_ctr
+
+
 if __name__ == '__main__':
-   dir = sys.argv[1]
-   assert os.path.exists(dir), 'Error: Directory does not exist!'
+   #dir = sys.argv[1]
+   #assert os.path.exists(dir), 'Error: Directory does not exist!'
 
    #global data
    #data_fn = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_remapped_test_new'
    #data = cPickle.load(open(data_fn))
-
-   forall_experiments(perform_prediction,dir)
+   #forall_experiments(perform_prediction,dir)
    #forall_experiments(collect_prediction,dir)
+
+   predict_on(sys.argv[1],sys.argv[2])