+ extended scripts
[qpalma.git] / scripts / Evaluation.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import pydb
7 import pdb
8 import os
9 import os.path
10
11 def evaluatePositions(eBegin,eEnd):
12 eBegin_pos = [elem for elem in eBegin if elem == 0]
13 eBegin_neg = [elem for elem in eBegin if elem != 0]
14 eEnd_pos = [elem for elem in eEnd if elem == 0]
15 eEnd_neg = [elem for elem in eEnd if elem != 0]
16
17 mean_eBegin_neg = 0
18 for idx in range(len(eBegin_neg)):
19 mean_eBegin_neg += eBegin_neg[idx]
20
21 try:
22 mean_eBegin_neg /= 1.0*len(eBegin_neg)
23 except:
24 mean_eBegin_neg = -1
25
26 mean_eEnd_neg = 0
27 for idx in range(len(eEnd_neg)):
28 mean_eEnd_neg += eEnd_neg[idx]
29
30 try:
31 mean_eEnd_neg /= 1.0*len(eEnd_neg)
32 except:
33 mean_eEnd_neg = -1
34
35 return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg
36
37
38 def perform_prediction(current_dir,run_name):
39 #cmd = 'echo ./doPrediction.sh %s | qsub -l h_vmem=1.0G -cwd -j y -N \"%s.log\"'%(current_dir,run_name)
40 cmd = './doPrediction.sh %s 1>%s.out 2>%s.err' %(current_dir,run_name,run_name)
41 os.system(cmd)
42 #print cmd
43
44
45 def forall_experiments(current_func,tl_dir):
46 """
47 Given the toplevel directoy this function calls for each subdir the
48 function given as first argument
49 """
50
51 dir_entries = os.listdir(tl_dir)
52 dir_entries = [os.path.join(tl_dir,de) for de in dir_entries]
53 run_dirs = [de for de in dir_entries if os.path.isdir(de)]
54
55 for current_dir in run_dirs:
56 run_name = current_dir.split('/')[-1]
57 current_func(current_dir,run_name)
58
59
60 def collect_prediction(current_dir,run_name):
61 """
62 Given the toplevel directoy this function calls for each subdir the
63 """
64
65 train_suffix = '_allPredictions_TRAIN'
66 test_suffix = '_allPredictions_TEST'
67 jp = os.path.join
68
69 filename = jp(current_dir,run_name)+train_suffix
70 print 'Prediction on: %s' % filename
71 prediction_on(filename)
72
73 filename = jp(current_dir,run_name)+test_suffix
74 print 'Prediction on: %s' % filename
75 prediction_on(filename)
76
77
78 def prediction_on(filename):
79 allPredictions = cPickle.load(open(filename))
80
81 exon1Begin = []
82 exon1End = []
83 exon2Begin = []
84 exon2End = []
85 allWrongExons = []
86 allDoubleScores = []
87
88 ctr = 0
89
90 for current_example_pred in allPredictions:
91 ambigous_match = False
92 if len(current_example_pred) > 1:
93 ambigous_match = True
94 example_scores = []
95
96 for elem_nr,current_pred in enumerate(current_example_pred):
97 e1_b_off = current_pred['e1_b_off']
98 e1_e_off = current_pred['e1_e_off']
99 e2_b_off = current_pred['e2_b_off']
100 e2_e_off = current_pred['e2_e_off']
101
102 if elem_nr > 0:
103 #print 'start positions'
104 #print current_pred['start_pos'], current_pred['alternative_start_pos']
105
106 if current_pred['label'] == False or (current_pred['label'] == True
107 and len(current_pred['predExons']) != 4):
108 if current_pred['DPScores'].flatten().tolist()[0][0] <\
109 current_example_pred[0]['DPScores'].flatten().tolist()[0][0]:
110 print current_pred['trueExons'][0,1]-current_pred['trueExons'][0,0],\
111 current_pred['trueExons'][1,1]-current_pred['trueExons'][1,0],\
112 current_pred['predExons']
113 print current_pred['DPScores'].flatten().tolist()[0][0],\
114 current_example_pred[0]['DPScores'].flatten().tolist()[0][0]
115 ctr += 1
116 print ctr
117
118 if e1_b_off != None:
119 exon1Begin.append(e1_b_off)
120 exon1End.append(e1_e_off)
121 exon2Begin.append(e2_b_off)
122 exon2End.append(e2_e_off)
123 else:
124 pass
125 #allWrongExons.append((newExons,exons))
126
127 if ambigous_match == True:
128 current_score = current_pred['DPScores'][0]
129 example_scores.append(current_score)
130
131 e1Begin_pos,e1Begin_neg,e1End_pos,e1End_neg,mean_e1Begin_neg,mean_e1End_neg = evaluatePositions(exon1Begin,exon1End)
132 e2Begin_pos,e2Begin_neg,e2End_pos,e2End_neg,mean_e2Begin_neg,mean_e2End_neg = evaluatePositions(exon2Begin,exon2End)
133
134 allDoubleScores.append(example_scores)
135
136 all_pos_correct = 0
137 for idx in range(len(exon1Begin)):
138 if exon1Begin[idx] == 0 and exon1End[idx] == 0\
139 and exon2Begin[idx] == 0 and exon2End[idx] == 0:
140 all_pos_correct += 1
141
142 print 'Total num. of examples: %d' % len(allPredictions)
143 print 'Number of total correct examples: %d' % all_pos_correct
144 print 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
145 print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
146 print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
147
148
149 if __name__ == '__main__':
150 dir = sys.argv[1]
151 assert os.path.exists(dir), 'Error directory does not exist!'
152
153 #forall_experiments(perform_prediction,dir)
154 forall_experiments(collect_prediction,dir)