+ first running version of Evaluation script
[qpalma.git] / scripts / Evaluation.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import os
7 import os.path
8
9 def evaluatePositions(eBegin,eEnd):
10
11 eBegin_pos = [elem for elem in eBegin if elem == 0]
12 eBegin_neg = [elem for elem in eBegin if elem != 0]
13 eEnd_pos = [elem for elem in eEnd if elem == 0]
14 eEnd_neg = [elem for elem in eEnd if elem != 0]
15
16 mean_eBegin_neg = 0
17 for idx in range(len(eBegin_neg)):
18 mean_eBegin_neg += eBegin_neg[idx]
19
20 try:
21 mean_eBegin_neg /= 1.0*len(eBegin_neg)
22 except:
23 mean_eBegin_neg = -1
24
25 mean_eEnd_neg = 0
26 for idx in range(len(eEnd_neg)):
27 mean_eEnd_neg += eEnd_neg[idx]
28
29 try:
30 mean_eEnd_neg /= 1.0*len(eEnd_neg)
31 except:
32 mean_eEnd_neg = -1
33
34 return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg
35
36 def perform_prediction(current_dir,run_name):
37 #cmd = 'echo ./doPrediction.sh %s | qsub -l h_vmem=1.0G -cwd -j y -N \"%s.log\"'%(current_dir,run_name)
38 cmd = './doPrediction.sh %s 1>%s.out 2>%s.err' %(current_dir,run_name,run_name)
39 os.system(cmd)
40 #print cmd
41
42 def forall_experiments(current_func,tl_dir):
43 """
44 Given the toplevel directoy this function calls for each subdir the
45 prediction function of each found qpalma training instance.
46
47 Once this is done we can collect the results
48 """
49
50 dir_entries = os.listdir(tl_dir)
51 dir_entries = [os.path.join(tl_dir,de) for de in dir_entries]
52 run_dirs = [de for de in dir_entries if os.path.isdir(de)]
53
54 for current_dir in run_dirs:
55 run_name = current_dir.split('/')[-1]
56 #print current_dir
57
58 current_func(current_dir,run_name)
59
60 def collect_prediction(current_dir,run_name):
61 """
62 Given the toplevel directoy this function calls for each subdir the
63
64 """
65
66 train_suffix = '_allPredictions_TRAIN'
67 test_suffix = '_allPredictions_TEST'
68 jp = os.path.join
69
70 filename = jp(current_dir,run_name)+train_suffix
71 allTrainPredictions = cPickle.load(open(filename))
72
73 exon1Begin = []
74 exon1End = []
75 exon2Begin = []
76 exon2End = []
77 allWrongExons = []
78
79 for current_pred in allTrainPredictions:
80 e1_b_off = current_pred['e1_b_off']
81 e1_e_off = current_pred['e1_e_off']
82 e2_b_off = current_pred['e2_b_off']
83 e2_e_off = current_pred['e2_e_off']
84
85 if e1_b_off != None:
86 exon1Begin.append(e1_b_off)
87 exon1End.append(e1_e_off)
88 exon2Begin.append(e2_b_off)
89 exon2End.append(e2_e_off)
90 else:
91 pass
92 #allWrongExons.append((newExons,exons))
93
94 #if e1_b_off == 0 and e1_e_off == 0 and e2_b_off == 0 and e2_e_off == 0:
95 # print >> logfile, 'example %d correct' % exampleIdx
96 #else:
97 # print >> logfile, 'example %d wrong' % exampleIdx
98
99 e1Begin_pos,e1Begin_neg,e1End_pos,e1End_neg,mean_e1Begin_neg,mean_e1End_neg = evaluatePositions(exon1Begin,exon1End)
100 e2Begin_pos,e2Begin_neg,e2End_pos,e2End_neg,mean_e2Begin_neg,mean_e2End_neg = evaluatePositions(exon2Begin,exon2End)
101
102 all_pos_correct = 0
103 for idx in range(len(exon1Begin)):
104 if exon1Begin[idx] == 0 and exon1End[idx] == 0\
105 and exon2Begin[idx] == 0 and exon2End[idx] == 0:
106 all_pos_correct += 1
107
108
109 print 'Total num. of examples: %d' % len(allTrainPredictions)
110 print 'Number of total correct examples: %d' % all_pos_correct
111 print 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
112 print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
113 print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
114
115
116 #filename = jp(current_dir,run_name)+test_suffix
117 #allTestPredictions = cPickle.load(open(filename))
118
119 #current_prediction = {'e1_b_off':e1_b_off,'e1_e_off':e1_e_off,
120 #'e2_b_off':e2_b_off ,'e2_e_off':e2_e_off,\
121 #'predExons':newExons, 'trueExons':exons,\
122 #'dna':dna, 'est':est,\
123 #'DPScores':newDPScores }
124
125
126 if __name__ == '__main__':
127 dir = sys.argv[1]
128 assert os.path.exists(dir), 'Error directory does not exist!'
129
130 #forall_experiments(perform_prediction,dir)
131 forall_experiments(collect_prediction,dir)