+ added c code from the palma project
[qpalma.git] / python / computeSpliceWeights.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from numpy import inf
5 from numpy.matlib import zeros
6
7 def calculateWeights(plf, scores):
8 """
9 3 * 30 supporting points: x_1 ... x_30
10 => 3 * 30 parameters (1 .. 90): y_1 ... y_30
11 piecewise linear function:
12
13 take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
14 these are our values x
15
16 | y_1 if x <= x_1
17 |
18 | x_i+1 - x x - x_i
19 f(x) = | y_i * ----------- + y_i+1 * ----------- if x_i <= x <= x_i+1
20 | x_i+1 - x_i x_i+1 - x_i
21 |
22 | y_30 if x_n <= x
23
24 y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
25 """
26
27 currentWeight = zeros((30,1))
28
29 for k in range(len(scores)):
30 value = scores[k]
31
32 Lower = 0
33 Lower = len([elem for elem in plf.limits if elem <= value])
34 Upper = Lower+1 ; # x-werte bleiben fest
35
36 if Lower == 0:
37 currentWeight[0] = currentWeight[0] + 1;
38 elif Lower == len(plf.limits):
39 currentWeight[-1] = currentWeight[-1] + 1;
40 else:
41 weightup = (value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower]) ;
42 weightlow = (plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower]) ;
43 currentWeight[Upper] = currentWeight[Upper] + weightup;
44 currentWeight[Lower] = currentWeight[Lower] + weightlow;
45
46 return currentWeight
47
48
49 def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
50 #assert(isempty(d.transform))
51 #assert(isempty(a.transform))
52 #assert(isempty(h.transform))
53
54 ####################################################################################
55 # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
56 ####################################################################################
57
58 # Picke die Positionen raus, an denen eine Donorstelle ist
59 #DonorScores = don_supp(find(SpliceAlign==1)) ;
60 #assert(all(DonorScores~=-Inf)) ;
61
62 DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
63 assert not ( -inf in DonorScores )
64
65 weightDon = calculateWeights(d,DonorScores)
66
67 ####################################################################################
68 # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
69 ####################################################################################
70
71 #AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ;
72 #assert(all(AcceptorScores~=-Inf)) ;
73
74 #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
75 AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if SpliceAlign[pos] == 2]
76 #assert not ( -inf in AcceptorScores )
77
78 weightAcc = calculateWeights(a,AcceptorScores)
79
80 ####################################################################################
81 # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
82 ####################################################################################
83
84 #intron_starts = find(SpliceAlign==1) ;
85 #intron_ends = find(SpliceAlign==2) ;
86
87 #%assert: introns do not overlap
88 #assert(length(intron_starts) == length(intron_ends)) ;
89 #assert(all(intron_starts < intron_ends)) ;
90 #assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ;
91
92 #weightIntron = calculateWeights(h,AcceptorScores)
93 weightIntron = [1,2,3]
94
95 return weightDon, weightAcc, weightIntron