+ got rid of some legacy code
[qpalma.git] / qpalma / computeSpliceWeights.py
1 # This program is free software; you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation; either version 2 of the License, or
4 # (at your option) any later version.
5 #
6 # Written (W) 2008 Fabio De Bona
7 # Copyright (C) 2008 Max-Planck-Society
8
9 from numpy import inf
10 from numpy.matlib import zeros
11 import pdb
12
13 # 3 * 30 supporting points: x_1 ... x_30 => 3 * 30 parameters (1 .. 90): y_1 ... y_30
14 # piecewise linear function:
15 # take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
16 # these are our values x
17 #
18 # | y_1 if x <= x_1
19 # |
20 # | x_i+1 - x x - x_i
21 # f(x) = | y_i * ----------- + y_i+1 * ----------- if x_i <= x <= x_i+1
22 # | x_i+1 - x_i x_i+1 - x_i
23 # |
24 # | y_30 if x_n <= x
25 #
26 # y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
27
28 def calculateWeights(plf, scores):
29 currentWeight = zeros((len(plf.limits),1))
30
31 for k in range(len(scores)):
32 value = scores[k]
33 Lower = len([elem for elem in plf.limits if elem <= value])
34
35 if Lower == 0:
36 currentWeight[0] += 1
37 elif Lower == len(plf.limits):
38 currentWeight[-1] += 1
39 else:
40 # because we count from 0 in python
41 Lower -= 1
42 Upper = Lower+1 ; # x-werte bleiben fest
43 #print value,Lower,Upper
44 weightup = 1.0*(value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower])
45 weightlow = 1.0*(plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower])
46 currentWeight[Upper] = currentWeight[Upper] + weightup
47 currentWeight[Lower] = currentWeight[Lower] + weightlow
48
49 #print plf.limits[Lower],plf.limits[Upper]
50 #print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
51
52 return currentWeight
53
54 def calculatePlif(plf, scores):
55 plifed_scores=[] ;
56
57 for k in range(len(scores)):
58 value = scores[k]
59
60 Lower = 0
61 for elem in plf.limits:
62 if elem<=value:
63 Lower+=1
64
65 if Lower == 0:
66 plifed_scores.append(plf.penalties[0])
67 elif Lower == len(plf.limits):
68 plifed_scores.append(plf.penalties[-1])
69 else:
70 Lower -= 1
71 Upper = Lower+1 ;
72 weightup = 1.0*(value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower])
73 weightlow = 1.0*(plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower])
74
75 plifed_scores.append( plf.penalties[Upper]*weightup + plf.penalties[Lower]*weightlow )
76
77 return plifed_scores
78
79 def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
80 ####################################################################################
81 # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
82 ####################################################################################
83
84 # Picke die Positionen raus, an denen eine Donorstelle ist
85 try:
86 DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
87 assert not ( -inf in DonorScores )
88 except:
89 print 'Error'
90 pdb.set_trace()
91
92 #print 'donor'
93 weightDon = calculateWeights(d,DonorScores)
94
95 ####################################################################################
96 # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
97 ####################################################################################
98
99 #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
100 try:
101 AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos] == 2]
102 assert not ( -inf in AcceptorScores )
103 except:
104 print 'Error'
105 pdb.set_trace()
106
107 #print 'acceptor'
108 weightAcc = calculateWeights(a,AcceptorScores)
109
110 ####################################################################################
111 # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
112 ####################################################################################
113
114 intron_starts = []
115 intron_ends = []
116 for pos,elem in enumerate(SpliceAlign):
117 if elem == 1:
118 intron_starts.append(pos)
119
120 if elem == 2:
121 intron_ends.append(pos)
122
123 assert len(intron_starts) == len(intron_ends)
124
125 for i in range(len(intron_starts)):
126 assert intron_starts[i] < intron_ends[i]
127
128 values = [0.0]*len(intron_starts)
129 for pos in range(len(intron_starts)):
130 values[pos] = intron_ends[pos] - intron_starts[pos] + 1
131
132 #print 'introns found: ' + str(values)
133 weightIntron = calculateWeights(h,values)
134
135 return weightDon, weightAcc, weightIntron