+ performed some further refactoring
[qpalma.git] / scripts / Evaluation.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import os
7 import os.path
8
9 def perform_prediction(tl_dir):
10 """
11 Given the toplevel directoy this function calls for each subdir the
12 prediction function of each found qpalma training instance.
13
14 Once this is done we can collect the results
15 """
16
17 dir_entries = os.listdir(tl_dir)
18 dir_entries = [os.path.join(tl_dir,de) for de in dir_entries]
19 run_dirs = [de for de in dir_entries if os.path.isdir(de)]
20
21 for current_dir in run_dirs:
22 #print current_dir
23 run_name = current_dir.split('/')[-1]
24 #cmd = 'echo ./doPrediction.sh %s | qsub -l h_vmem=1.0G -cwd -j y -N \"%s.log\"'%(current_dir,run_name)
25 cmd = './doPrediction.sh %s 1>%s.out 2>%s.err' %(current_dir,run_name,run_name)
26
27 os.system(cmd)
28 #print cmd
29
30 if __name__ == '__main__':
31 dir = sys.argv[1]
32 assert os.path.exists(dir)
33
34 perform_prediction(dir)
35
36
37
38 #if e1_b_off != None:
39 # exon1Begin.append(e1_b_off)
40 # exon1End.append(e1_e_off)
41 # exon2Begin.append(e2_b_off)
42 # exon2End.append(e2_e_off)
43 #else:
44 # allWrongExons.append((newExons,exons))
45
46 logfile = self.logfh
47
48 #if e1_b_off == 0 and e1_e_off == 0 and e2_b_off == 0 and e2_e_off == 0:
49 # print >> logfile, 'example %d correct' % exampleIdx
50 #else:
51 # print >> logfile, 'example %d wrong' % exampleIdx
52
53 #e1Begin_pos,e1Begin_neg,e1End_pos,e1End_neg,mean_e1Begin_neg,mean_e1End_neg = self.evaluatePositions(exon1Begin,exon1End)
54 #e2Begin_pos,e2Begin_neg,e2End_pos,e2End_neg,mean_e2Begin_neg,mean_e2End_neg = self.evaluatePositions(exon2Begin,exon2End)
55
56 #all_pos_correct = 0
57 #for idx in range(len(exon1Begin)):
58 # if exon1Begin[idx] == 0 and exon1End[idx] == 0\
59 # and exon2Begin[idx] == 0 and exon2End[idx] == 0:
60 # all_pos_correct += 1
61
62 #logfile = self.logfh
63 #print >> logfile, 'Total num. of examples: %d' % numExamples
64 #print >> logfile, 'Number of total correct examples: %d' % all_pos_correct
65 #print >> logfile, 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
66 #print >> logfile, 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
67 #print >> logfile, 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
68
69 #print 'Total num. of examples: %d' % numExamples
70 #print 'Number of total correct examples: %d' % all_pos_correct
71 #print 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
72 #print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
73 #print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
74 def evaluatePositions(self,eBegin,eEnd):
75
76 eBegin_pos = [elem for elem in eBegin if elem == 0]
77 eBegin_neg = [elem for elem in eBegin if elem != 0]
78 eEnd_pos = [elem for elem in eEnd if elem == 0]
79 eEnd_neg = [elem for elem in eEnd if elem != 0]
80
81 mean_eBegin_neg = 0
82 for idx in range(len(eBegin_neg)):
83 mean_eBegin_neg += eBegin_neg[idx]
84
85 try:
86 mean_eBegin_neg /= 1.0*len(eBegin_neg)
87 except:
88 mean_eBegin_neg = -1
89
90 mean_eEnd_neg = 0
91 for idx in range(len(eEnd_neg)):
92 mean_eEnd_neg += eEnd_neg[idx]
93
94 try:
95 mean_eEnd_neg /= 1.0*len(eEnd_neg)
96 except:
97 mean_eEnd_neg = -1
98
99 return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg