+ minor changes
[qpalma.git] / scripts / createAlignmentFileFromPrediction.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import pdb
7 import os
8 import os.path
9 import math
10 from qpalma.parsers import *
11
12 from Evaluation import load_chunks
13
14 def prediction_on(current_dir,filtered_reads,out_fname):
15
16 #print 'parsing filtered reads..'
17 #all_filtered_reads = parse_filtered_reads(filtered_reads)
18 #print 'found %d filtered reads' % len(all_filtered_reads)
19
20 import qparser
21 qparser.parse_reads(filtered_reads)
22
23 out_fh = open(out_fname,'w+')
24
25 allPredictions = load_chunks(current_dir)
26 allPositions = {}
27
28 for current_prediction in allPredictions:
29 id = current_prediction['id']
30
31 #current_ground_truth = all_filtered_reads[id]
32 current_ground_truth = qparser.fetch_read(id)
33
34 true_cut = current_ground_truth['true_cut']
35 seq = current_ground_truth['seq']
36 q1 = current_ground_truth['prb']
37
38 chr = current_prediction['chr']
39 strand = current_prediction['strand']
40 #true_cut = current_prediction['true_cut']
41 start_pos = current_prediction['start_pos']
42 alignment = current_prediction['alignment']
43
44 predExons = current_prediction['predExons']
45 predExons = [e+start_pos for e in predExons]
46
47 p_start = current_ground_truth['p_start']
48 e_stop = current_ground_truth['exon_stop']
49 e_start = current_ground_truth['exon_start']
50 p_stop = current_ground_truth['p_stop']
51 cut_pos = current_ground_truth['true_cut']
52
53 correct_prediction = False
54
55 #if len(predExons) == 4:
56 # spliced_flag = True
57 # predExons[1] -= 1
58 # predExons[3] -= 1
59
60 # if p_start == predExons[0] and e_stop == predExons[1] and\
61 # e_start == predExons[2] and p_stop == predExons[3]:
62 # correct_prediction = True
63
64 # if p_start == predExons[0] and e_stop == predExons[1] and\
65 # e_start == predExons[2] and p_stop+1 == predExons[3]:
66 # print 'special case'
67
68 #elif len(predExons) == 2:
69 # spliced_flag = False
70 # predExons[1] -= 1
71
72 # if math.fabs(p_start - predExons[0]) <= 0 and math.fabs(p_stop - predExons[1]) <= 2:
73 # correct_prediction = True
74 #
75 #else:
76 # pass
77
78 (qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds,\
79 tExonSizes,tStarts, tEnds) = alignment
80
81 new_line = '%d\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
82 (id,chr,strand,seq,str(q1)[1:-1],start_pos,qStart,qEnd,tStart,tEnd,num_exons,\
83 str(qExonSizes)[1:-1].replace(' ',''),str(qStarts)[1:-1].replace(' ',''),\
84 str(qEnds)[1:-1].replace(' ',''),str(tExonSizes)[1:-1].replace(' ',''),\
85 str(tStarts)[1:-1].replace(' ',''),str(tEnds)[1:-1].replace(' ',''))
86
87 out_fh.write(new_line)
88
89 return allPositions
90
91
92 #def writePredictions(fname,allPositions):
93 #
94 #
95 # allEntries = {}
96 #
97 # for line in open('/fml/ag-raetsch/share/projects/qpalma/solexa/allReads.pipeline'):
98 # line = line.strip()
99 # id,seq,q1,q2,q3 = line.split()
100 # id = int(id)
101 #
102 # allEntries[id] = (seq,q1,q2,q3)
103 #
104 #
105 # for id,elems in allPositions.items():
106 # seq,q1,q2,q3 = allEntries[id]
107 # chr,strand,start_pos,true_cut,p1,p2,p3,p4,alignment = elems
108 #
109 # p1 += start_pos
110 # p2 += start_pos
111 # p3 += start_pos
112 # p4 += start_pos
113 #
114 # out_fh.close()
115 #
116
117 #def collect_prediction(current_dir):
118 # """
119 # Given the toplevel directory this function takes care that for each distinct
120 # experiment the training and test predictions are evaluated.
121 #
122 # """
123 # train_suffix = '_allPredictions_TRAIN'
124 # test_suffix = '_allPredictions_TEST'
125 #
126 # run_name = 'run_+_quality_+_splicesignals_+_intron_len_1'
127 #
128 # jp = os.path.join
129 # b2s = ['-','+']
130 #
131 # #currentRun = cPickle.load(open(jp(current_dir,'run_object_1.pickle')))
132 # #QFlag = currentRun['enable_quality_scores']
133 # #SSFlag = currentRun['enable_splice_signals']
134 # #ILFlag = currentRun['enable_intron_length']
135 # #currentRunId = '%s%s%s' % (b2s[QFlag],b2s[SSFlag],b2s[ILFlag])
136 #
137 # filename = jp(current_dir,run_name)+test_suffix
138 # print 'Prediction on: %s' % filename
139 # test_result = prediction_on(filename)
140 #
141 # fname = 'predictions.txt'
142 # writePredictions(fname,test_result)
143 #
144
145 if __name__ == '__main__':
146 current_dir = sys.argv[1]
147 filtered_reads = sys.argv[2]
148 out_fh = sys.argv[3]
149 prediction_on(current_dir,filtered_reads,out_fh)