7d82c0c8ced3a65f204709cd46f1db61b1b07938
[qpalma.git] / python / Configuration.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import numpy.matlib
5
6 fixedParamQ = numpy.matlib.mat(
7 [[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
8 [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
9 [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
10 [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
11 [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
12 [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
13 [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
14 [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
15 [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
16 [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
17 [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
18 [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
19 [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
20 [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
21 [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
22 [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
23 [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
24 [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
25 [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
26 [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
27 [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
28 [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
29 [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
30 [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
31 [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
32 [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
33 [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
34 [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
35 [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
36 [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
37 [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
38 [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
39 [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
40 [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
41 [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
42 [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
43 [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
44 [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
45 [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
46 [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
47 [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
48 [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
49 [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
50 [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
51 [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
52 [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
53 [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
54 [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
55 [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
56 [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
57 [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
58 [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
59 [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
60 [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
61 [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
62 [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
63 [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
64 [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
65 [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
66 [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
67 [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
68 [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
69 [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
70 [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
71 [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
72 [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
73 [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
74 [ 0.4177651 ], [ 0.01360547], [ 0.29069319]
75 ])
76
77 fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
78 [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
79 [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
80 [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
81 [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
82 [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
83 [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
84 [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
85 [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
86 [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
87 [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
88 [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
89 [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
90 [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
91 [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
92 [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
93 [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
94 [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
95 [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
96 [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
97 [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
98 [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
99 [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
100 [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
101 [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
102 [ 0.4177651 ], [ 0.01360547], [ 0.29069319]])
103
104
105 ###########################################################
106 #
107 # The parameters for the QPalma algorithm
108 #
109 #
110
111 C = 100000.0
112
113 # 'normal' means work like Palma 'using_quality_scores' means work like Palma
114 # plus using sequencing quality scores
115
116 #mode = 'normal'
117
118 mode = 'using_quality_scores'
119
120 # When using quality scores our scoring function is defined as
121 #
122 # f: S_e x R x S -> R
123 #
124 # where S_e is {A,C,G,T,N} and S = {A,C,G,T,N,-}
125 #
126 # as opposed to a usage without quality scores when we only have
127 #
128 # f: S x S -> R
129 #
130 # The matrix of plifs is defined as follows:
131 #
132 # elem | - a c g t n
133 # -------------------------
134 # idx | 0 1 2 3 4 5
135 #
136 #
137 # dna
138 #
139 # - a c g t n
140 # a
141 # est c
142 # g
143 # t
144 # n
145 #
146 # so the index can be calculated as (estnum-1)*6 + dnanum.
147 #
148 # At ests we do not have gaps with quality scores so we look up the matchmatrix
149 # instead.
150
151 numDonSuppPoints = 30
152 numAccSuppPoints = 30
153 numLengthSuppPoints = 30
154 numQualSuppPoints = 2
155
156 min_qual = -1
157 max_qual = 40
158
159 USE_OPT = True
160
161 if mode == 'normal':
162 sizeMatchmatrix = (6,6)
163 estPlifs = 0
164 dnaPlifs = 0
165 numQualPlifs = estPlifs*dnaPlifs
166 elif mode == 'using_quality_scores':
167 sizeMatchmatrix = (6,1)
168 estPlifs = 5
169 dnaPlifs = 6
170 numQualPlifs = estPlifs*dnaPlifs
171 else:
172 assert False, 'Wrong operation mode specified'
173
174 totalQualSuppPoints = numQualPlifs*numQualSuppPoints
175
176 numFeatures = numDonSuppPoints + numAccSuppPoints\
177 + numLengthSuppPoints + sizeMatchmatrix[0]*sizeMatchmatrix[1] + totalQualSuppPoints
178
179 iter_steps = 40
180 remove_duplicate_scores = False
181 print_matrix = False
182 anzpath = 2
183
184 if mode == 'normal':
185 fixedParam = fixedParam[:numFeatures]
186 elif mode == 'using_quality_scores':
187 fixedParam = fixedParamQ[:numFeatures]
188 else:
189 assert False, 'Wrong operation mode specified'
190
191 # Check for valid parameters
192 assert numQualPlifs >= 0
193 assert numDonSuppPoints > 1
194 assert numAccSuppPoints > 1
195 assert numLengthSuppPoints > 1
196 assert numQualSuppPoints > 1