54d3beb90c0cd3fdcdb00a0a094f6c351a1b14d3
2 # -*- coding: utf-8 -*-
14 def create_alignment_file(current_loc
,out_fname
):
16 #qparser.parse_reads(filtered_reads)
18 out_fh
= open(out_fname
,'w+')
20 allPredictions
= cPickle
.load(open(current_loc
))
24 for current_prediction
in allPredictions
:
25 id = current_prediction
['id']
27 #current_ground_truth = qparser.fetch_read(id)
28 #true_cut = current_ground_truth['true_cut']
29 #seq = current_ground_truth['seq']
30 #q1 = current_ground_truth['prb']
32 seq
= current_prediction
['est']
33 dna
= current_prediction
['dna']
38 chromo
= current_prediction
['chr']
39 strand
= current_prediction
['strand']
40 start_pos
= current_prediction
['start_pos']
41 alignment
= current_prediction
['alignment']
43 DPScores
= current_prediction
['DPScores']
45 predExons
= current_prediction
['predExons']
46 predExons
= [e
+start_pos
for e
in predExons
]
49 (qStart
, qEnd
, tStart
, tEnd
, num_exons
, qExonSizes
, qStarts
, qEnds
,\
50 tExonSizes
,tStarts
, tEnds
) = alignment
55 predExons
= numpy
.mat(predExons
).reshape(len(predExons
)/2,2)
57 match
= predExons
[0,1] - predExons
[0,0]
58 if predExons
.shape
== (2,2):
59 match
+= predExons
[1,1] - predExons
[1,0]
70 if predExons
.shape
== (2,2):
71 Tgapbs
+= predExons
[1,0] - predExons
[0,1]
74 # &strand, Qname, &Qsize,
75 # &Qstart, &Qend, Tname, &Tsize,
76 # &Tstart, &Tend, &blockcnt,exonsizes_str,Qstarts_str,Tstarts_str
81 # we enforce equal exons sizes for q and t because we are missing indel
82 # positions of the alignment
84 if qExonSizes
[0] != tExonSizes
[0]:
87 Qname
= '%d(%2.2f)'%(id,DPScores
.tolist()[0][0])
91 Tname
= 'CHR%d'%chromo
95 Tsize
= tEnd
+1 + start_pos
96 Tstart
= tStart
+ start_pos
97 Tend
= tEnd
+ start_pos
99 exonsizes_str
= str(tExonSizes
)[1:-1].replace(' ','')
100 Qstarts_str
= str(qStarts
)[1:-1].replace(' ','')
101 Tstarts_str
= str(map(lambda x
: x
+start_pos
,tStarts
))[1:-1].replace(' ','')
104 new_line
= '%d\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
105 (id,chromo
,strand
,seq
,str(q1
)[1:-1],start_pos
,qStart
,qEnd
,tStart
,tEnd
,num_exons
,\
106 str(qExonSizes
)[1:-1].replace(' ',''),str(qStarts
)[1:-1].replace(' ',''),\
107 str(qEnds
)[1:-1].replace(' ',''),str(tExonSizes
)[1:-1].replace(' ',''),\
108 str(tStarts
)[1:-1].replace(' ',''),str(tEnds
)[1:-1].replace(' ',''))
112 #new_line = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n"\
113 #% (match, mismatch, repmatch, Ns, Qgapcnt, Qgapbs,\
114 #Tgapcnt, Tgapbs, strand, Qname, Qsize, Qstart, Qend,\
115 #Tname, Tsize, Tstart, Tend,\
116 #blockcnt,exonsizes_str,Qstarts_str,Tstarts_str) #str(predExons))
118 out_fh
.write(new_line
)
122 if __name__
== '__main__':
123 current_dir
= sys
.argv
[1]
125 create_alignment_file(current_dir
,out_fh
)