2 Some useful functions and classes used by palma.py
7 # Copyright notice string
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 Written (W) 2006 Uta Schulze
15 Copyright (C) 2006 Max-Planck-Society
21 import palma_utils
as pu
22 import alignment_tool
as at
51 def do_alignment(dna
, dna_len
, est
, est_len
, reduce_region
, model
, t_strand
,\
52 translation
, comparison_char
, don_support_values
, acc_support_values
):
56 mlen
= 6 #length of match matrix, const
58 align_info
= alignment_info()
64 align_info
.qStart
= []
66 align_info
.tStart
= []
68 align_info
.num_exons
= []
69 align_info
.qExonSizes
= []
70 align_info
.qStarts
= []
72 align_info
.tExonSizes
= []
73 align_info
.tStarts
= []
75 align_info
.exon_length
= []
76 align_info
.identity
= []
77 align_info
.comparison
= []
80 align_info
.dna_letters
= []
81 align_info
.est_letters
= []
82 align_info
.splice_align
= []
85 #<------------------------------------------------------------------------------>
86 # assigns parameter [h(30),d(30),a(30),mmatrix(36)]
87 # computes scores for donor/acceptor sites using p.l. function
88 #<------------------------------------------------------------------------------>
89 h
,d
,a
,mmatrix
= pu
.set_param(model
)
90 donor
= don_support_values
91 acceptor
= acc_support_values
92 for cnt
in range(dna_len
):
93 if numpy
.isfinite(don_support_values
[cnt
]):
94 donor
[cnt
] = pu
.penalty_lookup(d
,don_support_values
[cnt
])
95 if numpy
.isfinite(acc_support_values
[cnt
]):
96 acceptor
[cnt
] = pu
.penalty_lookup(a
,acc_support_values
[cnt
])
97 del don_support_values
98 del acc_support_values
99 #<------------------------------------------------------------------------------>
101 #print time.time() - t0
103 #<------------------------------------------------------------------------------>
104 # convert list to double-Array (py -> c++),
105 #<------------------------------------------------------------------------------>
106 mmatrix_array
= at
.createDoubleArrayFromList(mmatrix
)
107 donor_array
= at
.createDoubleArrayFromList(donor
)
108 acceptor_array
= at
.createDoubleArrayFromList(acceptor
)
109 h_struct
= at
.penalty()
111 h_struct
.limits
= at
.createDoubleArrayFromList(h
.limits
)
112 h_struct
.penalties
= at
.createDoubleArrayFromList(h
.penalties
)
113 h_struct
.name
= h
.name
114 h_struct
.max_len
= h
.max_len
115 h_struct
.min_len
= h
.min_len
116 h_struct
.transform
= h
.transform
117 #<------------------------------------------------------------------------------>
119 #print time.time() - t0
121 #<------------------------------------------------------------------------------>
123 #<------------------------------------------------------------------------------>
124 BLA
= at
.align_info() ; #not necessary
126 BLA
= at
.myalign(reduce_region
, mlen
, dna_len
, dna
, est_len
, est
, h_struct
,\
127 mmatrix_array
, donor_array
, acceptor_array
)
131 #print time.time() - t0
133 #<------------------------------------------------------------------------------>
136 #<------------------------------------------------------------------------------>
137 # myalign Output - convert arrays to list (c++ -> py)
138 #<------------------------------------------------------------------------------>
139 splice_align_array
= at
.createListFromIntArray(BLA
.splice_align
, dna_len
*nr_paths
)
140 est_align_array
= at
.createListFromIntArray(BLA
.est_align
, est_len
*nr_paths
)
141 mmatrix_param_array
= at
.createListFromIntArray(BLA
.mmatrix_param
, mlen
*mlen
*nr_paths
)
142 alignment_scores
= at
.createListFromDoubleArray(BLA
.alignment_scores
,nr_paths
)
143 alignment_length
= at
.createListFromIntArray(BLA
.alignment_length
, nr_paths
)
144 #print "before crash on intel mac"
145 aligned_dna_array
= at
.createListFromIntArray(BLA
.aligned_dna
, alignment_length
[0])
146 aligned_est_array
= at
.createListFromIntArray(BLA
.aligned_est
, alignment_length
[0])
147 #print "after crash on intel mac"
148 #<------------------------------------------------------------------------------>
150 #print alignment_scores
152 #<------------------------------------------------------------------------------>
154 #<------------------------------------------------------------------------------>
155 at
.delete_intArray(BLA
.splice_align
)
156 at
.delete_intArray(BLA
.est_align
)
157 at
.delete_intArray(BLA
.mmatrix_param
)
158 at
.delete_intArray(BLA
.aligned_dna
)
159 at
.delete_intArray(BLA
.aligned_est
)
160 at
.delete_doubleArray(BLA
.alignment_scores
)
161 at
.delete_intArray(BLA
.alignment_length
)
165 at
.delete_doubleArray(mmatrix_array
)
166 at
.delete_doubleArray(donor_array
)
167 at
.delete_doubleArray(acceptor_array
)
168 #<------------------------------------------------------------------------------>
171 #<------------------------------------------------------------------------------>
172 # myalign output: creating lists of information
173 # i.th entry in list is alignment for i.th path (nr_path many)
174 #<------------------------------------------------------------------------------>
183 for i
in range(nr_paths
):
184 result_length
= alignment_length
[i
] #length of i.th alignment
185 if result_length
== 0:
187 nr_paths
= i
-1+1 #+1 because index starts with zero
188 alignment_scores
= alignment_scores
[0:nr_paths
]
189 alignment_length
= alignment_length
[0:nr_paths
]
190 assert(len(mmatrix_param
)==nr_paths
)
191 assert(len(splice_align
)==nr_paths
)
192 assert(len(est_align
)==nr_paths
)
193 assert(len(dna_numbers
)==nr_paths
)
194 assert(len(est_numbers
)==nr_paths
)
195 assert(len(dna_letters
)==nr_paths
)
196 assert(len(est_letters
)==nr_paths
)
199 #match matrix for the i.th path (numbers of matches, mismatches and gaps):
200 mmatrix_param
.append(mmatrix_param_array
[i
*mlen
*mlen
:(i
+1)*mlen
*mlen
])
202 #splice_align and est_align for the i.th path (show dangling ends and exons/introns):
203 splice_align
.append(splice_align_array
[i
*dna_len
:(i
+1)*dna_len
])
204 est_align
.append(est_align_array
[i
*est_len
:(i
+1)*est_len
])
206 dna_numbers
.append(aligned_dna_array
[0:result_length
]) #alignment in numbers (see translation)
207 est_numbers
.append(aligned_est_array
[0:result_length
]) #alignment in numbers (see translation)
208 aligned_dna_array
= aligned_dna_array
[result_length
:] #shorten aligned_dna_array
209 aligned_est_array
= aligned_est_array
[result_length
:] #shorten aligned_est_array
211 dna_letters
.append(reduce(lambda x
,y
: x
+y
, map(lambda x
: translation
[int(x
)],dna_numbers
[i
])))
212 est_letters
.append(reduce(lambda x
,y
: x
+y
, map(lambda x
: translation
[int(x
)],est_numbers
[i
])))
215 #absolutely no alignment found
216 align_info
.nr_paths_found
= nr_paths
219 assert(len(aligned_dna_array
)==0)
220 assert(len(aligned_est_array
)==0)
221 #<------------------------------------------------------------------------------>
224 #<------------------------------------------------------------------------------>
225 # analysing alignments (n-best)
226 #<------------------------------------------------------------------------------>
228 print alignment_scores
229 print alignment_length
238 align_info
.nr_paths_found
= nr_paths
241 for i
in range(nr_paths
):
242 (match
, mism
, qGapb
, tGapb
) = pu
.evaluate_mmatrix(mmatrix_param
[i
])
243 if (match
+mism
+qGapb
+tGapb
) == 0:
244 #what to do then ...?, don't think, this happpens
245 align_info
.nr_paths_found
-= 1
248 (qStart
, qEnd
, tStart
, tEnd
, num_exons
, qExonSizes
, qStarts
, qEnds
, tExonSizes
, tStarts
, tEnds
) =\
249 pu
.get_splice_info(splice_align
[i
], est_align
[i
])
252 score
= alignment_scores
[i
]
254 (exon_length
, identity
, ssQuality
, exonQuality
, comparison
, qIDX
, tIDX
) =\
255 pu
.comp_identity(dna_letters
[i
], est_letters
[i
], t_strand
, qStart
, tStart
, translation
, comparison_char
)
257 first_identity
= None
259 for i
in range(len(comparison
)):
260 if comparison
[i
] == '|' and first_identity
is None: first_identity
= i
261 #print i,comparison[i],first_identity,qIDX[i],tIDX[i]
262 if comparison
[-i
] == '|' and last_identity
is None: last_identity
= len(comparison
) - i
- 1
263 #print first_identity, last_identity, len(tIDX), len(qIDX)
264 #print tIDX[first_identity], tIDX[last_identity], qIDX[first_identity], qIDX[last_identity]
265 #print tStart, tEnd, qStart, qEnd
266 #exon_length[0] = exon_length[0] - tIDX[first_identity] - tStart + 2
267 #exon_length[-1] = exon_length[-1] - tEnd + tIDX[last_identity] + 1
268 tStart
= tIDX
[first_identity
]
269 tStarts
[0] = tIDX
[first_identity
]
270 tEnd
= tIDX
[last_identity
]
271 tEnds
[-1] = tIDX
[last_identity
]
272 qStart
= qIDX
[first_identity
]
273 qStarts
[0] = qIDX
[first_identity
]
274 qEnd
= qIDX
[last_identity
]
275 qEnds
[-1] = qIDX
[last_identity
]
277 align_info
.score
= score
278 align_info
.match
= match
279 align_info
.mism
= mism
280 align_info
.qGapb
= qGapb
281 align_info
.tGapb
= tGapb
282 align_info
.qStart
= qStart
283 align_info
.qEnd
= qEnd
284 align_info
.tStart
= tStart
285 align_info
.tEnd
= tEnd
286 align_info
.num_exons
= num_exons
287 align_info
.qExonSizes
= qExonSizes
288 align_info
.qStarts
= qStarts
289 align_info
.qEnds
= qEnds
290 align_info
.tExonSizes
=tExonSizes
291 align_info
.tStarts
= tStarts
292 align_info
.tEnds
= tEnds
293 align_info
.exon_length
= exon_length
294 align_info
.identity
= identity
295 align_info
.ssQuality
= ssQuality
296 align_info
.exonQuality
= exonQuality
297 align_info
.comparison
= comparison
298 align_info
.qIDX
= qIDX
299 align_info
.tIDX
= tIDX
300 align_info
.dna_letters
= dna_letters
301 align_info
.est_letters
= est_letters
303 ai
.append(align_info
)
305 #<------------------------------------------------------------------------------>