+ removed accidently checked in vim swap files
[qpalma.git] / python / computeSpliceWeights.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from numpy import inf
5 from numpy.matlib import zeros
6
7 # 3 * 30 supporting points: x_1 ... x_30
8 # => 3 * 30 parameters (1 .. 90): y_1 ... y_30
9 # piecewise linear function:
10 #
11 # take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
12 # these are our values x
13 #
14 # | y_1 if x <= x_1
15 # |
16 # | x_i+1 - x x - x_i
17 # f(x) = | y_i * ----------- + y_i+1 * ----------- if x_i <= x <= x_i+1
18 # | x_i+1 - x_i x_i+1 - x_i
19 # |
20 # | y_30 if x_n <= x
21 #
22 # y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
23
24 def calculateWeights(plf, scores):
25 currentWeight = zeros((30,1))
26
27 for k in range(len(scores)):
28 value = scores[k]
29 Lower = len([elem for elem in plf.limits if elem <= value])
30 Upper = Lower+1 ; # x-werte bleiben fest
31
32 #print Lower,Upper,len(plf.limits)
33
34 if Lower == 0:
35 currentWeight[0] += 1
36 elif Lower == len(plf.limits):
37 currentWeight[-1] += 1
38 else:
39 weightup = 1.0*(value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower])
40 weightlow = 1.0*(plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower])
41 currentWeight[Upper] = currentWeight[Upper] + weightup
42 currentWeight[Lower] = currentWeight[Lower] + weightlow
43
44 #print value,plf.limits[Lower],plf.limits[Upper]
45 #print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
46
47 return currentWeight
48
49 def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
50 ####################################################################################
51 # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
52 ####################################################################################
53
54 # Picke die Positionen raus, an denen eine Donorstelle ist
55 DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
56 assert not ( -inf in DonorScores )
57
58 #print 'donor'
59 weightDon = calculateWeights(d,DonorScores)
60
61 ####################################################################################
62 # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
63 ####################################################################################
64
65 #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
66 AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos-1] == 2]
67 assert not ( -inf in AcceptorScores )
68
69 #print 'acceptor'
70 weightAcc = calculateWeights(a,AcceptorScores)
71
72 ####################################################################################
73 # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
74 ####################################################################################
75
76 intron_starts = []
77 intron_ends = []
78 for pos,elem in enumerate(SpliceAlign):
79 if elem == 1:
80 intron_starts.append(pos)
81
82 if elem == 2:
83 intron_ends.append(pos)
84
85 assert len(intron_starts) == len(intron_ends)
86
87 for i in range(len(intron_starts)):
88 assert intron_starts[i] < intron_ends[i]
89
90 values = [0.0]*len(intron_starts)
91 for pos in range(len(intron_starts)):
92 values[pos] = intron_ends[pos] - intron_starts[pos] + 1
93
94 weightIntron = calculateWeights(h,values)
95
96 return weightDon, weightAcc, weightIntron