+ fixed a bug in the C++ interface
[qpalma.git] / standalone / palma / __init__.py
1 """ --- palma ---
2 Some useful functions and classes used by palma.py
3 """
4
5 __version__ = '0.3.3'
6
7 # Copyright notice string
8 __copyright__ = """\
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
13
14 Written (W) 2006 Uta Schulze
15 Copyright (C) 2006 Max-Planck-Society
16 """
17
18
19 import sys
20 import numpy
21 import palma_utils as pu
22 import alignment_tool as at
23
24
25 class alignment_info:
26 nr_paths_found = -1
27 score = []
28 match = []
29 mism = []
30 qGapb = []
31 tGapb = []
32 qStart = []
33 qEnd = []
34 tStart = []
35 tEnd = []
36 num_exons = []
37 qExonSizes = []
38 qStarts = []
39 qEnds = []
40 tExonSizes = []
41 tStarts = []
42 tEnds = []
43 exon_length = []
44 identity = []
45 comparison = []
46 qIDX = []
47 tIDX = []
48 dna_letters = []
49 est_letters = []
50
51 def do_alignment(dna, dna_len, est, est_len, reduce_region, model, t_strand,\
52 translation, comparison_char, don_support_values, acc_support_values):
53 import time
54
55 nr_paths = 1
56 mlen = 6 #length of match matrix, const
57
58 align_info = alignment_info()
59 align_info.score = []
60 align_info.match = []
61 align_info.mism = []
62 align_info.qGapb = []
63 align_info.tGapb = []
64 align_info.qStart = []
65 align_info.qEnd = []
66 align_info.tStart = []
67 align_info.tEnd = []
68 align_info.num_exons = []
69 align_info.qExonSizes = []
70 align_info.qStarts = []
71 align_info.qEnds = []
72 align_info.tExonSizes = []
73 align_info.tStarts = []
74 align_info.tEnds = []
75 align_info.exon_length = []
76 align_info.identity = []
77 align_info.comparison = []
78 align_info.qIDX = []
79 align_info.tIDX = []
80 align_info.dna_letters = []
81 align_info.est_letters = []
82 align_info.splice_align = []
83
84 t0 = time.time()
85 #<------------------------------------------------------------------------------>
86 # assigns parameter [h(30),d(30),a(30),mmatrix(36)]
87 # computes scores for donor/acceptor sites using p.l. function
88 #<------------------------------------------------------------------------------>
89 h,d,a,mmatrix = pu.set_param(model)
90 donor = don_support_values
91 acceptor = acc_support_values
92 for cnt in range(dna_len):
93 if numpy.isfinite(don_support_values[cnt]):
94 donor[cnt] = pu.penalty_lookup(d,don_support_values[cnt])
95 if numpy.isfinite(acc_support_values[cnt]):
96 acceptor[cnt] = pu.penalty_lookup(a,acc_support_values[cnt])
97 del don_support_values
98 del acc_support_values
99 #<------------------------------------------------------------------------------>
100
101 #print time.time() - t0
102
103 #<------------------------------------------------------------------------------>
104 # convert list to double-Array (py -> c++),
105 #<------------------------------------------------------------------------------>
106 mmatrix_array = at.createDoubleArrayFromList(mmatrix)
107 donor_array = at.createDoubleArrayFromList(donor)
108 acceptor_array = at.createDoubleArrayFromList(acceptor)
109 h_struct = at.penalty()
110 h_struct.len = h.len
111 h_struct.limits = at.createDoubleArrayFromList(h.limits)
112 h_struct.penalties = at.createDoubleArrayFromList(h.penalties)
113 h_struct.name = h.name
114 h_struct.max_len = h.max_len
115 h_struct.min_len = h.min_len
116 h_struct.transform = h.transform
117 #<------------------------------------------------------------------------------>
118
119 #print time.time() - t0
120
121 #<------------------------------------------------------------------------------>
122 # call myalign
123 #<------------------------------------------------------------------------------>
124 BLA = at.align_info() ; #not necessary
125 try:
126 BLA = at.myalign(reduce_region, mlen, dna_len, dna, est_len, est, h_struct,\
127 mmatrix_array, donor_array, acceptor_array)
128 except MemoryError:
129 raise(MemoryError)
130
131 #print time.time() - t0
132
133 #<------------------------------------------------------------------------------>
134
135
136 #<------------------------------------------------------------------------------>
137 # myalign Output - convert arrays to list (c++ -> py)
138 #<------------------------------------------------------------------------------>
139 splice_align_array = at.createListFromIntArray(BLA.splice_align, dna_len*nr_paths)
140 est_align_array = at.createListFromIntArray(BLA.est_align, est_len*nr_paths)
141 mmatrix_param_array = at.createListFromIntArray(BLA.mmatrix_param, mlen*mlen*nr_paths)
142 alignment_scores = at.createListFromDoubleArray(BLA.alignment_scores,nr_paths)
143 alignment_length = at.createListFromIntArray(BLA.alignment_length, nr_paths)
144 #print "before crash on intel mac"
145 aligned_dna_array = at.createListFromIntArray(BLA.aligned_dna, alignment_length[0])
146 aligned_est_array = at.createListFromIntArray(BLA.aligned_est, alignment_length[0])
147 #print "after crash on intel mac"
148 #<------------------------------------------------------------------------------>
149
150 #print alignment_scores
151
152 #<------------------------------------------------------------------------------>
153 # free memory
154 #<------------------------------------------------------------------------------>
155 at.delete_intArray(BLA.splice_align)
156 at.delete_intArray(BLA.est_align)
157 at.delete_intArray(BLA.mmatrix_param)
158 at.delete_intArray(BLA.aligned_dna)
159 at.delete_intArray(BLA.aligned_est)
160 at.delete_doubleArray(BLA.alignment_scores)
161 at.delete_intArray(BLA.alignment_length)
162
163 del BLA
164 del h_struct
165 at.delete_doubleArray(mmatrix_array)
166 at.delete_doubleArray(donor_array)
167 at.delete_doubleArray(acceptor_array)
168 #<------------------------------------------------------------------------------>
169
170
171 #<------------------------------------------------------------------------------>
172 # myalign output: creating lists of information
173 # i.th entry in list is alignment for i.th path (nr_path many)
174 #<------------------------------------------------------------------------------>
175 mmatrix_param = []
176 splice_align = []
177 est_align = []
178 dna_numbers = []
179 est_numbers = []
180 dna_letters = []
181 est_letters = []
182
183 for i in range(nr_paths):
184 result_length = alignment_length[i] #length of i.th alignment
185 if result_length == 0:
186 #no alignment found
187 nr_paths = i-1+1 #+1 because index starts with zero
188 alignment_scores = alignment_scores[0:nr_paths]
189 alignment_length = alignment_length[0:nr_paths]
190 assert(len(mmatrix_param)==nr_paths)
191 assert(len(splice_align)==nr_paths)
192 assert(len(est_align)==nr_paths)
193 assert(len(dna_numbers)==nr_paths)
194 assert(len(est_numbers)==nr_paths)
195 assert(len(dna_letters)==nr_paths)
196 assert(len(est_letters)==nr_paths)
197 break
198
199 #match matrix for the i.th path (numbers of matches, mismatches and gaps):
200 mmatrix_param.append(mmatrix_param_array[i*mlen*mlen:(i+1)*mlen*mlen])
201
202 #splice_align and est_align for the i.th path (show dangling ends and exons/introns):
203 splice_align.append(splice_align_array[i*dna_len:(i+1)*dna_len])
204 est_align.append(est_align_array[i*est_len:(i+1)*est_len])
205
206 dna_numbers.append(aligned_dna_array[0:result_length]) #alignment in numbers (see translation)
207 est_numbers.append(aligned_est_array[0:result_length]) #alignment in numbers (see translation)
208 aligned_dna_array = aligned_dna_array[result_length:] #shorten aligned_dna_array
209 aligned_est_array = aligned_est_array[result_length:] #shorten aligned_est_array
210
211 dna_letters.append(reduce(lambda x,y: x+y, map(lambda x: translation[int(x)],dna_numbers[i])))
212 est_letters.append(reduce(lambda x,y: x+y, map(lambda x: translation[int(x)],est_numbers[i])))
213
214 if nr_paths==0:
215 #absolutely no alignment found
216 align_info.nr_paths_found = nr_paths
217 return align_info
218
219 assert(len(aligned_dna_array)==0)
220 assert(len(aligned_est_array)==0)
221 #<------------------------------------------------------------------------------>
222
223
224 #<------------------------------------------------------------------------------>
225 # analysing alignments (n-best)
226 #<------------------------------------------------------------------------------>
227 if 0:
228 print alignment_scores
229 print alignment_length
230 print mmatrix_param
231 print splice_align
232 print est_align
233 print dna_numbers
234 print est_numbers
235 print dna_letters
236 print est_letters
237
238 align_info.nr_paths_found = nr_paths
239
240 ai = [] ;
241 for i in range(nr_paths):
242 (match, mism, qGapb, tGapb) = pu.evaluate_mmatrix(mmatrix_param[i])
243 if (match +mism +qGapb +tGapb) == 0:
244 #what to do then ...?, don't think, this happpens
245 align_info.nr_paths_found -= 1
246 continue
247
248 (qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds, tExonSizes, tStarts, tEnds) =\
249 pu.get_splice_info(splice_align[i], est_align[i])
250 #print tStarts
251
252 score = alignment_scores[i]
253
254 (exon_length, identity, ssQuality, exonQuality, comparison, qIDX, tIDX) =\
255 pu.comp_identity(dna_letters[i], est_letters[i], t_strand, qStart, tStart, translation, comparison_char)
256
257 first_identity = None
258 last_identity = None
259 for i in range(len(comparison)):
260 if comparison[i] == '|' and first_identity is None: first_identity = i
261 #print i,comparison[i],first_identity,qIDX[i],tIDX[i]
262 if comparison[-i] == '|' and last_identity is None: last_identity = len(comparison) - i - 1
263 #print first_identity, last_identity, len(tIDX), len(qIDX)
264 #print tIDX[first_identity], tIDX[last_identity], qIDX[first_identity], qIDX[last_identity]
265 #print tStart, tEnd, qStart, qEnd
266 #exon_length[0] = exon_length[0] - tIDX[first_identity] - tStart + 2
267 #exon_length[-1] = exon_length[-1] - tEnd + tIDX[last_identity] + 1
268 tStart = tIDX[first_identity]
269 tStarts[0] = tIDX[first_identity]
270 tEnd = tIDX[last_identity]
271 tEnds[-1] = tIDX[last_identity]
272 qStart = qIDX[first_identity]
273 qStarts[0] = qIDX[first_identity]
274 qEnd = qIDX[last_identity]
275 qEnds[-1] = qIDX[last_identity]
276
277 align_info.score = score
278 align_info.match = match
279 align_info.mism = mism
280 align_info.qGapb = qGapb
281 align_info.tGapb = tGapb
282 align_info.qStart = qStart
283 align_info.qEnd = qEnd
284 align_info.tStart = tStart
285 align_info.tEnd = tEnd
286 align_info.num_exons = num_exons
287 align_info.qExonSizes = qExonSizes
288 align_info.qStarts = qStarts
289 align_info.qEnds = qEnds
290 align_info.tExonSizes =tExonSizes
291 align_info.tStarts = tStarts
292 align_info.tEnds = tEnds
293 align_info.exon_length = exon_length
294 align_info.identity = identity
295 align_info.ssQuality = ssQuality
296 align_info.exonQuality = exonQuality
297 align_info.comparison = comparison
298 align_info.qIDX = qIDX
299 align_info.tIDX = tIDX
300 align_info.dna_letters = dna_letters
301 align_info.est_letters = est_letters
302
303 ai.append(align_info)
304
305 #<------------------------------------------------------------------------------>
306
307 return ai