2 # -*- coding: utf-8 -*-
4 from numpy
.matlib
import zeros
7 def computeSpliceAlign(dna
, exons
):
9 Exonpos: Anfang Exon bis 1 rechts neben Ende Exon
10 Daraus folgt: SpliceposAnfang ist gleich Ende Exon von Vorgaenger
11 (ausser dem letzten und ersten)
12 und Ende Splicestelle ist gleich Anfang Exon vom naechsten, da Splicestelleende notiert 1 rechts neben Splciestellenende
14 cccccA1cccccE1cccccA2 ... AmcccccEmcccccccc
15 cccccXXcccccA1cccccE1 ... Em-1cccXXcccccccc
18 numberOfExons
= (exons
.shape
)[0] # how many rows ?
19 exonSizes
= [-1]*numberOfExons
21 assert numberOfExons
== 3
23 for idx
in range(numberOfExons
):
24 exonSizes
[idx
] = exons
[idx
,1] - exons
[idx
,0]
26 sizeMatchmatrix
= 6 # -acgtn
28 # SpliceAlign vector describes alignment:
29 # 1:donorpos, 3:intron 2:acceptorpos, 0:exon, 4: dangling end
33 SpliceAlign
.extend([4]*(exons
[0,0]))
35 for idx
in range(numberOfExons
):
36 exonLength
= exonSizes
[idx
]
37 SpliceAlign
.extend([0]*exonLength
)
39 if idx
< numberOfExons
-1:
40 intronLength
= exons
[idx
+1,0] - exons
[idx
,1]
41 SpliceAlign
.extend([1]+[3]*(intronLength
-2)+[2])
43 if len(dna
) > exons
[2,1]:
44 SpliceAlign
.extend([4]*(len(dna
)-exons
[2,1]))
46 assert len(SpliceAlign
) == len(dna
), pdb
.set_trace()
48 # number of matches: just have to look at the underlying est
49 # est = dna(find(SpliceAlign==0)) # exon nucleotides
50 exonPos
= [pos
for pos
,elem
in enumerate(SpliceAlign
) if elem
== 0]
51 est
= [elem
for pos
,elem
in enumerate(dna
) if pos
in exonPos
]
53 #length_est = sum(exon_sizes) ;
55 for elem
in exonSizes
:
56 totalESTLength
+= elem
58 assert totalESTLength
== len(est
)
60 # counts the occurences of a,c,g,t,n in this order
61 numChar
= [0]*sizeMatchmatrix
76 for idx
in range(sizeMatchmatrix
):
77 totalNumChar
+= numChar
[idx
]
79 assert totalNumChar
== len(est
)
81 # writing in weight match matrix
82 # matrix is saved columnwise
83 trueWeightMatch
= zeros((sizeMatchmatrix
*sizeMatchmatrix
,1)) # Scorematrix fuer Wahrheit
84 for idx
in range(1,sizeMatchmatrix
):
85 trueWeightMatch
[(sizeMatchmatrix
+1)*idx
] = numChar
[idx
-1]
87 return SpliceAlign
, trueWeightMatch