2 # -*- coding: utf-8 -*-
4 from numpy
.matlib
import zeros
7 def computeSpliceAlign(dna
, exons
):
9 Exonpos: Anfang Exon bis 1 rechts neben Ende Exon
10 Daraus folgt: SpliceposAnfang ist gleich Ende Exon von Vorgaenger
11 (ausser dem letzten und ersten)
12 und Ende Splicestelle ist gleich Anfang Exon vom naechsten, da Splicestelleende notiert 1 rechts neben Splciestellenende
14 cccccA1cccccE1cccccA2 ... AmcccccEmcccccccc
15 cccccXXcccccA1cccccE1 ... Em-1cccXXcccccccc
18 numberOfExons
= (exons
.shape
)[0] # how many rows ?
19 exonSizes
= [-1]*numberOfExons
21 for idx
in range(numberOfExons
):
22 exonSizes
[idx
] = exons
[idx
,1] - exons
[idx
,0]
24 sizeMatchmatrix
= 6 # -acgtn
26 # SpliceAlign vector describes alignment:
27 # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
31 SpliceAlign
.extend([4]*(exons
[0,0]))
34 # exon_length = exon_sizes(i); %exons(i,2) is begin of intron
35 # SpliceAlign = [SpliceAlign, zeros(1,exon_length)] ;
37 # intron_length = exons(i+1,1) - exons(i,2) ;
38 # SpliceAlign = [SpliceAlign, 1, ones(1,intron_length-2)*3, 2] ;
42 for idx
in range(numberOfExons
):
43 exonLength
= exonSizes
[idx
]
44 SpliceAlign
.extend([0]*exonLength
)
46 if idx
< numberOfExons
-1:
47 intronLength
= exons
[idx
+1,0] - exons
[idx
,1]
48 SpliceAlign
.extend([1])
49 SpliceAlign
.extend([3]*((intronLength
-2)))
50 SpliceAlign
.extend([2])
52 if len(dna
) > exons
[2,1]:
53 SpliceAlign
.extend([4]*(len(dna
)+1-exons
[2,1]))
55 assert len(SpliceAlign
) == len(dna
), pdb
.set_trace()
57 # number of matches: just have to look at the underlying est
58 # est = dna(find(SpliceAlign==0)) # exon nucleotides
59 exonPos
= [pos
for pos
,elem
in enumerate(SpliceAlign
) if elem
== 0]
60 est
= [elem
for pos
,elem
in enumerate(dna
) if pos
in exonPos
]
62 #length_est = sum(exon_sizes) ;
64 for elem
in exonSizes
:
65 totalESTLength
+= elem
67 assert totalESTLength
== len(est
)
69 # counts the occurences of a,c,g,t,n in this order
70 numChar
= [0]*sizeMatchmatrix
85 for idx
in range(sizeMatchmatrix
):
86 totalNumChar
+= numChar
[idx
]
88 assert totalNumChar
== len(est
)
90 # writing in weight match matrix
91 # matrix is saved columnwise
92 trueWeightMatch
= zeros((sizeMatchmatrix
*sizeMatchmatrix
,1)) # Scorematrix fuer Wahrheit
93 for idx
in range(1,sizeMatchmatrix
):
94 trueWeightMatch
[(sizeMatchmatrix
+1)*idx
] = numChar
[idx
-1]
96 return SpliceAlign
, trueWeightMatch