+ training works,
[qpalma.git] / scripts / Evaluation.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import pydb
7 import pdb
8 import os
9 import os.path
10 import math
11
12
13 def evaluatePositions(eBegin,eEnd):
14 eBegin_pos = [elem for elem in eBegin if elem == 0]
15 eBegin_neg = [elem for elem in eBegin if elem != 0]
16 eEnd_pos = [elem for elem in eEnd if elem == 0]
17 eEnd_neg = [elem for elem in eEnd if elem != 0]
18
19 mean_eBegin_neg = 0
20 for idx in range(len(eBegin_neg)):
21 mean_eBegin_neg += eBegin_neg[idx]
22
23 try:
24 mean_eBegin_neg /= 1.0*len(eBegin_neg)
25 except:
26 mean_eBegin_neg = -1
27
28 mean_eEnd_neg = 0
29 for idx in range(len(eEnd_neg)):
30 mean_eEnd_neg += eEnd_neg[idx]
31
32 try:
33 mean_eEnd_neg /= 1.0*len(eEnd_neg)
34 except:
35 mean_eEnd_neg = -1
36
37 return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg
38
39
40 def perform_prediction(current_dir,run_name):
41 cmd = 'echo /fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/doPrediction.sh %s | qsub -l h_vmem=12.0G -cwd -j y -N \"%s.log\"'%(current_dir,run_name)
42 #cmd = './doPrediction.sh %s 1>%s.out 2>%s.err' %(current_dir,run_name,run_name)
43 #print cmd
44 os.system(cmd)
45
46 def forall_experiments(current_func,tl_dir):
47 """
48 Given the toplevel directoy this function calls for each subdir the
49 function given as first argument
50 """
51
52 dir_entries = os.listdir(tl_dir)
53 dir_entries = [os.path.join(tl_dir,de) for de in dir_entries]
54 run_dirs = [de for de in dir_entries if os.path.isdir(de)]
55
56 for current_dir in run_dirs:
57 run_name = current_dir.split('/')[-1]
58 current_func(current_dir,run_name)
59
60
61 def collect_prediction(current_dir,run_name):
62 """
63 Given the toplevel directoy this function calls for each subdir the
64 """
65
66 train_suffix = '_allPredictions_TRAIN'
67 test_suffix = '_allPredictions_TEST'
68 jp = os.path.join
69
70 filename = jp(current_dir,run_name)+train_suffix
71 print 'Prediction on: %s' % filename
72 prediction_on(filename)
73
74 filename = jp(current_dir,run_name)+test_suffix
75 print 'Prediction on: %s' % filename
76 prediction_on(filename)
77
78
79 def prediction_on(filename):
80 allPredictions = cPickle.load(open(filename))
81
82 exon1Begin = []
83 exon1End = []
84 exon2Begin = []
85 exon2End = []
86 allWrongExons = []
87 allDoubleScores = []
88
89 pos_correct_ctr = 0
90 pos_incorrect_ctr = 0
91 score_correct_ctr = 0
92 score_incorrect_ctr = 0
93
94 for current_example_pred in allPredictions:
95 gt_example = current_example_pred[0]
96 gt_score = gt_example['DPScores'].flatten().tolist()[0][0]
97 gt_correct = evaluate_unmapped_example(gt_example)
98
99 for elem_nr,current_pred in enumerate(current_example_pred[1:]):
100
101 current_label,comparison_result,current_score = evaluate_example(current_pred)
102
103 if current_label:
104 if comparison_result:
105 pos_correct_ctr += 1
106 else:
107 pos_incorrect_ctr += 1
108 else:
109 if gt_correct and current_label == False and gt_score >= current_score:
110 score_correct_ctr += 1
111 elif gt_correct and current_label == False and not (gt_score >= current_score):
112 score_incorrect_ctr += 1
113
114 print 'Total num. of examples: %d' % len(allPredictions)
115 print 'Number of correct examples: %d' % pos_correct_ctr
116 print 'Number of incorrect examples: %d' % pos_incorrect_ctr
117 print 'Number of correct scores : %d' % score_correct_ctr
118 print 'Number of incorrect scores: %d' % score_incorrect_ctr
119
120 #print 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
121 #print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
122 #print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
123
124 def evaluate_unmapped_example(current_prediction):
125
126 predExons = current_prediction['predExons']
127 trueExons = current_prediction['trueExons']
128
129 return compare_exons(predExons,trueExons)
130
131
132 def evaluate_example(current_prediction):
133 label = False
134
135 try:
136 label = current_prediction['label']
137 except:
138 pass
139
140 predExons = current_prediction['predExons']
141 trueExons = current_prediction['trueExons']
142
143 predPositions = [elem + current_prediction['alternative_start_pos'] for elem in predExons]
144 truePositions = [elem + current_prediction['start_pos'] for elem in trueExons.flatten().tolist()[0]]
145 #pdb.set_trace()
146
147 pred_score = current_prediction['DPScores'].flatten().tolist()[0][0]
148
149 altPredPositions = [0]*4
150 if len(predPositions) == 4:
151 altPredPositions[0] = predPositions[0]
152 altPredPositions[1] = predPositions[1]+1
153 altPredPositions[2] = predPositions[2]+1
154 altPredPositions[3] = predPositions[3]
155
156 altPredPositions2 = [0]*4
157 if len(predPositions) == 4:
158 altPredPositions2[0] = predPositions[0]
159 altPredPositions2[1] = predPositions[1]
160 altPredPositions2[2] = predPositions[2]+1
161 altPredPositions2[3] = predPositions[3]
162
163 altPredPositions3 = [0]*4
164 if len(predPositions) == 4:
165 altPredPositions3[0] = predPositions[0]
166 altPredPositions3[1] = predPositions[1]
167 altPredPositions3[2] = predPositions[2]-1
168 altPredPositions3[3] = predPositions[3]
169
170 pos_comparison = (predPositions == truePositions or altPredPositions == truePositions or altPredPositions2 == truePositions or altPredPositions3 == truePositions)
171
172 if label == True and pos_comparison == False:
173 pdb.set_trace()
174
175 return label,pos_comparison,pred_score
176
177 def compare_exons(predExons,trueExons):
178 e1_b_off,e1_e_off,e2_b_off,e2_e_off = 0,0,0,0
179
180 if len(predExons) == 4:
181 e1_begin,e1_end = predExons[0],predExons[1]
182 e2_begin,e2_end = predExons[2],predExons[3]
183 else:
184 return False
185
186 e1_b_off = int(math.fabs(e1_begin - trueExons[0,0]))
187 e1_e_off = int(math.fabs(e1_end - trueExons[0,1]))
188
189 e2_b_off = int(math.fabs(e2_begin - trueExons[1,0]))
190 e2_e_off = int(math.fabs(e2_end - trueExons[1,1]))
191
192 if e1_b_off == 0 and e1_e_off == 0 and e2_b_off == 0\
193 and e2_e_off == 0:
194 return True
195
196 return False
197
198
199 def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign):
200 newExons = []
201 oldElem = -1
202 SpliceAlign = SpliceAlign.flatten().tolist()[0]
203 SpliceAlign.append(-1)
204 for pos,elem in enumerate(SpliceAlign):
205 if pos == 0:
206 oldElem = -1
207 else:
208 oldElem = SpliceAlign[pos-1]
209
210 if oldElem != 0 and elem == 0: # start of exon
211 newExons.append(pos)
212
213 if oldElem == 0 and elem != 0: # end of exon
214 newExons.append(pos)
215
216 e1_b_off,e1_e_off,e2_b_off,e2_e_off = 0,0,0,0
217
218 if len(newExons) == 4:
219 e1_begin,e1_end = newExons[0],newExons[1]
220 e2_begin,e2_end = newExons[2],newExons[3]
221 else:
222 return None,None,None,None,newExons
223
224 e1_b_off = int(math.fabs(e1_begin - exons[0,0]))
225 e1_e_off = int(math.fabs(e1_end - exons[0,1]))
226
227 e2_b_off = int(math.fabs(e2_begin - exons[1,0]))
228 e2_e_off = int(math.fabs(e2_end - exons[1,1]))
229
230 return e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons
231
232
233 if __name__ == '__main__':
234 dir = sys.argv[1]
235 assert os.path.exists(dir), 'Error directory does not exist!'
236
237 forall_experiments(perform_prediction,dir)
238 #forall_experiments(collect_prediction,dir)
239
240 """
241 if elem_nr > 0:
242 #print 'start positions'
243 #print current_pred['start_pos'], current_pred['alternative_start_pos']
244
245 if current_pred['label'] == False or (current_pred['label'] == True
246 and len(current_pred['predExons']) != 4):
247 if
248 current_example_pred[0]['DPScores'].flatten().tolist()[0][0]:
249 print current_pred['trueExons'][0,1]-current_pred['trueExons'][0,0],\
250 current_pred['trueExons'][1,1] - current_pred['trueExons'][1,0],\
251 current_pred['predExons']
252 print current_pred['DPScores'].flatten().tolist()[0][0],\
253 current_example_pred[0]['DPScores'].flatten().tolist()[0][0]
254 ctr += 1
255 print ctr
256
257 if e1_b_off != None:
258 exon1Begin.append(e1_b_off)
259 exon1End.append(e1_e_off)
260 exon2Begin.append(e2_b_off)
261 exon2End.append(e2_e_off)
262 else:
263 pass
264 #allWrongExons.append((newExons,exons))
265
266 if ambigous_match == True:
267 current_score = current_pred['DPScores'][0]
268 example_scores.append(current_score)
269
270 e1Begin_pos,e1Begin_neg,e1End_pos,e1End_neg,mean_e1Begin_neg,mean_e1End_neg = evaluatePositions(exon1Begin,exon1End)
271 e2Begin_pos,e2Begin_neg,e2End_pos,e2End_neg,mean_e2Begin_neg,mean_e2End_neg = evaluatePositions(exon2Begin,exon2End)
272
273 allDoubleScores.append(example_scores)
274 """