import cPickle
import sys
-import pydb
import pdb
import os
import os.path
import math
+from qpalma.parsers import *
+
+
data = None
os.system(cmd)
+
def forall_experiments(current_func,tl_dir):
"""
Given the toplevel directoy this function calls for each subdir the
#createTable(all_results)
+
+def predict_on(filename,filtered_reads):
+
+ print 'parsing filtered reads..'
+ all_filtered_reads = parse_filtered_reads(filtered_reads)
+ print 'found %d filtered reads' % len(all_filtered_reads)
+
+ allPredictions = cPickle.load(open(filename))
+
+ pos_correct_ctr = 0
+ pos_incorrect_ctr = 0
+
+ score_correct_ctr = 0
+ score_incorrect_ctr = 0
+
+ total_vmatch_instances_ctr = 0
+
+
+ for current_prediction in allPredictions:
+ id = current_prediction['id']
+ current_ground_truth = all_filtered_reads[id]
+
+ start_pos = current_prediction['start_pos']
+ chr = current_prediction['chr']
+ strand = current_prediction['strand']
+
+ #score = current_prediction['DPScores'].flatten().tolist()[0][0]
+
+ #pdb.set_trace()
+
+ predExons = current_prediction['predExons'] #:newExons, 'dna':dna, 'est':est
+ predExons = [e+start_pos for e in predExons]
+ if len(predExons) == 4:
+ predExons[1] -= 1
+ predExons[3] -= 1
+
+ cut_pos = current_ground_truth['true_cut']
+ p_start = current_ground_truth['p_start']
+ e_stop = current_ground_truth['exon_stop']
+ e_start = current_ground_truth['exon_start']
+ p_stop = current_ground_truth['p_stop']
+
+ true_cut = current_ground_truth['true_cut']
+
+ if p_start == predExons[0] and e_stop == predExons[1] and\
+ e_start == predExons[2] and p_stop == predExons[3]:
+ pos_correct_ctr += 1
+ else:
+ pos_incorrect_ctr += 1
+ #pdb.set_trace()
+
+ elif len(predExons) == 2:
+ predExons[1] -= 1
+
+ cut_pos = current_ground_truth['true_cut']
+ p_start = current_ground_truth['p_start']
+ p_stop = current_ground_truth['p_stop']
+
+ true_cut = current_ground_truth['true_cut']
+
+ if p_start == predExons[0] and p_stop == predExons[1]:
+ pos_correct_ctr += 1
+ else:
+ pos_incorrect_ctr += 1
+ #pdb.set_trace()
+
+ else:
+ pass
+ ## check whether the correct predictions score higher than the incorrect
+ ## ones
+ #cmp_res = compare_scores_and_labels(current_scores,current_labels)
+ #if cmp_res:
+ # score_correct_ctr += 1
+ #else:
+ # score_incorrect_ctr += 1
+
+ numPredictions = len(allPredictions)
+
+ # now that we have evaluated all instances put out all counters and sizes
+ print 'Total num. of examples: %d' % numPredictions
+ print 'Correct pos: %2.3f, incorrect pos: %2.3f' %\
+ (pos_correct_ctr/(1.0*numPredictions),pos_incorrect_ctr/(1.0*numPredictions))
+
+ #print 'Correct scores: %d, incorrect scores: %d' %\
+ #(score_correct_ctr,score_incorrect_ctr)
+
+ #pos_error = 1.0 * pos_incorrect_ctr / true_vmatch_instances_ctr
+ #score_error = 1.0 * score_incorrect_ctr / total_vmatch_instances_ctr
+
+
if __name__ == '__main__':
- dir = sys.argv[1]
- assert os.path.exists(dir), 'Error: Directory does not exist!'
+ #dir = sys.argv[1]
+ #assert os.path.exists(dir), 'Error: Directory does not exist!'
#global data
#data_fn = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_remapped_test_new'
#data = cPickle.load(open(data_fn))
-
- forall_experiments(perform_prediction,dir)
+ #forall_experiments(perform_prediction,dir)
#forall_experiments(collect_prediction,dir)
+
+ predict_on(sys.argv[1],sys.argv[2])