+ fixed a bug in the C++ interface
[qpalma.git] / standalone / palma / model.py
1 #
2 # This program is free software; you can redistribute it and/or modify
3 # it under the terms of the GNU General Public License as published by
4 # the Free Software Foundation; either version 2 of the License, or
5 # (at your option) any later version.
6 #
7 # Written (W) 2006-2007 Soeren Sonnenburg
8 # Written (W) 2007 Gunnar Raetsch
9 # Copyright (C) 2007 Fraunhofer Institute FIRST and Max-Planck-Society
10 #
11
12 import sys
13 import numpy
14 from numpy import mat,array,chararray,inf
15
16 class model(object):
17 #acceptor
18 acceptor_bins=None
19 acceptor_limits=None
20 acceptor_penalties=None
21
22 acceptor_splice_b=None
23 acceptor_splice_order=None
24 acceptor_splice_window_left=None
25 acceptor_splice_window_right=None
26 acceptor_splice_alphas=None
27 acceptor_splice_svs=None
28
29 #donor
30 donor_bins=None
31 donor_limits=None
32 donor_penalties=None
33
34 donor_splice_b=None
35 donor_splice_order=None
36 #donor_splice_use_gc=None
37 donor_splice_window_left=None
38 donor_splice_window_right=None
39 donor_splice_alphas=None
40 donor_splice_svs=None
41
42 # intron length
43 intron_len_bins=None
44 intron_len_limits=None
45 intron_len_penalties=None
46 intron_len_min=None
47 intron_len_max=None
48 intron_len_transform=None
49
50 gene_len_max = None
51
52 # substitution matrix
53 substitution_matrix=None
54
55 def parse_file(file):
56 m=model()
57
58 sys.stdout.write("loading model file"); sys.stdout.flush()
59
60 l=file.readline();
61
62 if l != '%palma definition file version: 1.0\n':
63 sys.stderr.write("\nfile not a palma definition file\n")
64 print l
65 return None
66
67 while l:
68 if not ( l.startswith('%') or l.startswith('\n') ): # comment
69 #print l[0:100]
70
71 #acceptor
72 if m.acceptor_bins is None: m.acceptor_bins=parse_value(l, 'acceptor_bins')
73 if m.acceptor_limits is None: m.acceptor_limits=parse_vector(l, file, 'acceptor_limits')
74 if m.acceptor_penalties is None: m.acceptor_penalties=parse_vector(l, file, 'acceptor_penalties') #DEAD
75
76 if m.acceptor_splice_b is None: m.acceptor_splice_b=parse_value(l, 'acceptor_splice_b')
77 if m.acceptor_splice_order is None: m.acceptor_splice_order=parse_value(l, 'acceptor_splice_order')
78 if m.acceptor_splice_window_left is None: m.acceptor_splice_window_left=parse_value(l, 'acceptor_splice_window_left')
79 if m.acceptor_splice_window_right is None: m.acceptor_splice_window_right=parse_value(l, 'acceptor_splice_window_right')
80 if m.acceptor_splice_alphas is None: m.acceptor_splice_alphas=parse_vector(l, file, 'acceptor_splice_alphas')
81 if m.acceptor_splice_svs is None: m.acceptor_splice_svs=parse_string(l, file, 'acceptor_splice_svs')
82
83 #donor
84 if m.donor_bins is None: m.donor_bins=parse_value(l, 'donor_bins')
85 if m.donor_limits is None: m.donor_limits=parse_vector(l, file, 'donor_limits')
86 if m.donor_penalties is None: m.donor_penalties=parse_vector(l, file, 'donor_penalties') #DEAD
87
88 if m.donor_splice_b is None: m.donor_splice_b=parse_value(l, 'donor_splice_b')
89 if m.donor_splice_order is None: m.donor_splice_order=parse_value(l, 'donor_splice_order')
90 #if m.donor_splice_use_gc is None: m.donor_splice_use_gc=parse_value(l, 'donor_splice_use_gc')
91 if m.donor_splice_window_left is None: m.donor_splice_window_left=parse_value(l, 'donor_splice_window_left')
92 if m.donor_splice_window_right is None: m.donor_splice_window_right=parse_value(l, 'donor_splice_window_right')
93 if m.donor_splice_alphas is None: m.donor_splice_alphas=parse_vector(l, file, 'donor_splice_alphas')
94 if m.donor_splice_svs is None: m.donor_splice_svs=parse_string(l, file, 'donor_splice_svs')
95
96
97 # intron length
98 if m.intron_len_bins is None: m.intron_len_bins=parse_value(l, 'intron_len_bins')
99 if m.intron_len_limits is None: m.intron_len_limits=parse_vector(l, file, 'intron_len_limits')
100 if m.intron_len_penalties is None: m.intron_len_penalties=parse_vector(l, file, 'intron_len_penalties')
101 if m.intron_len_min is None: m.intron_len_min=parse_value(l, 'intron_len_min')
102 if m.intron_len_max is None: m.intron_len_max=parse_value(l, 'intron_len_max')
103 if m.intron_len_transform is None: m.intron_len_transform=parse_value(l, 'intron_len_transform')
104
105 # gene length
106 if m.gene_len_max is None: m.gene_len_max=parse_value(l, 'gene_len_max')
107
108 if m.substitution_matrix is None: m.substitution_matrix=parse_vector(l, file, 'substitution_matrix')
109
110 l=file.readline()
111
112 sys.stderr.write('done\n')
113 return m
114
115 def parse_value(line, name):
116 if (line.startswith(name)):
117 #print 'found ' + name
118 sys.stdout.write('.'); sys.stdout.flush()
119 str = line[line.find('=')+1:-1] ;
120 if str[0] == "'":
121 return str[1:-1]
122 else:
123 #print float(str)
124 return float(str)
125 else:
126 return None
127
128 def parse_vector(line, file, name):
129 mat = parse_matrix(line, file, name)
130 if mat is None:
131 return mat
132 else:
133 mat = numpy.array(mat).flatten()
134 return mat
135
136 def parse_matrix(line, file, name):
137 if (line.startswith(name)):
138 sys.stdout.write('.'); sys.stdout.flush()
139 if line.find(']') < 0:
140 l=''
141 while l is not None and l.find(']') < 0:
142 line+=l
143 l=file.readline()
144 if l is not None and l.find(']') >= 0:
145 line+=l
146
147 if line.find(']') < 0:
148 sys.stderr.write("matrix `" + name + "' ended without ']'\n")
149 return None
150 else:
151 return mat(line[line.find('['):line.find(']')+1])
152 else:
153 return None
154
155 def parse_string(line, file, name):
156 if (line.startswith(name)):
157 sys.stdout.write('.'); sys.stdout.flush()
158 l=''
159 lines=[]
160 while l is not None and l.find(']') < 0:
161 if l:
162 lines+=[list(l[:-1])]
163 l=file.readline()
164
165 if l.find(']') < 0:
166 sys.stderr.write("string ended without ']'\n")
167 return None
168 else:
169 #seqlen=len(lines[0])
170 num=len(lines)
171 #trdat = chararray((seqlen,num),1,order='FORTRAN')
172 #for i in xrange(num):
173 # trdat[:,i]=lines[i]
174 trdat = num*[[]]
175 for idx,example in enumerate(lines):
176 trdat[idx] = ''.join(example)
177 return trdat
178 else:
179 return None
180
181 if __name__ == '__main__':
182 import bz2
183 import sys
184 #import hotshot, hotshot.stats
185
186 def load():
187 #f=bz2.BZ2File('../../splicing/msplicer/standalone/python/data/msplicer_elegansWS120_gc=1_orf=0.dat.bz2');
188 filename='test.dat.bz2'
189 if filename.endswith('.bz2'):
190 f=bz2.BZ2File(filename);
191 else:
192 f=file(filename);
193
194 m=parse_file(f);
195
196 print m.acceptor_bins is None
197 print m.acceptor_limits is None
198 print m.acceptor_penalties is None
199
200 print m.acceptor_splice_b is None
201 print m.acceptor_splice_order is None
202 print m.acceptor_splice_window_left is None
203 print m.acceptor_splice_window_right is None
204 print m.acceptor_splice_alphas is None
205 print m.acceptor_splice_svs is None
206
207 print m.donor_bins is None
208 print m.donor_limits is None
209 print m.donor_penalties is None
210
211 print m.donor_splice_b is None
212 print m.donor_splice_order is None
213 #print m.donor_splice_use_gc is None
214 print m.donor_splice_window_left is None
215 print m.donor_splice_window_right is None
216 print m.donor_splice_alphas is None
217 print m.donor_splice_svs is None
218 print m.intron_len_bins is None
219 print m.intron_len_limits is None
220 print m.intron_len_penalties is None
221 print m.intron_len_min is None
222 print m.intron_len_max is None
223 print m.intron_len_transform is None
224
225 print m.substitution_matrix is None
226
227 print m.intron_len_transform
228 print m.substitution_matrix
229
230 load()
231
232 #prof = hotshot.Profile("model.prof")
233 #benchtime = prof.runcall(load)
234 #prof.close()
235 #stats = hotshot.stats.load("model.prof")
236 #stats.strip_dirs()
237 #stats.sort_stats('time', 'calls')
238 #stats.print_stats(20)