3 Functions for parsing palma parameter files,
4 to extract splice site predictors for qpalma.
10 from numpy
import mat
,array
,chararray
,inf
13 """A container for palma parameters"""
18 acceptor_penalties
=None
20 acceptor_splice_b
=None
21 acceptor_splice_order
=None
22 acceptor_splice_window_left
=None
23 acceptor_splice_window_right
=None
24 acceptor_splice_alphas
=None
25 acceptor_splice_svs
=None
33 donor_splice_order
=None
34 #donor_splice_use_gc=None
35 donor_splice_window_left
=None
36 donor_splice_window_right
=None
37 donor_splice_alphas
=None
42 intron_len_limits
=None
43 intron_len_penalties
=None
46 intron_len_transform
=None
51 substitution_matrix
=None
55 def parse_file(filename
):
56 if filename
.endswith('.param.bz2'):
57 fileptr
=bz2
.BZ2File(filename
);
59 sys
.stderr
.write('Expected palma parameter file with ending param.bz2\n')
62 sys
.stdout
.write("loading model file"); sys
.stdout
.flush()
66 if l
!= '%palma definition file version: 1.0\n':
67 sys
.stderr
.write("\nfile not a palma definition file\n")
72 if not ( l
.startswith('%') or l
.startswith('\n') ): # comment
75 if m
.acceptor_bins
is None: m
.acceptor_bins
=parse_value(l
, 'acceptor_bins')
76 if m
.acceptor_limits
is None: m
.acceptor_limits
=parse_vector(l
, fileptr
, 'acceptor_limits')
77 if m
.acceptor_penalties
is None: m
.acceptor_penalties
=parse_vector(l
, fileptr
, 'acceptor_penalties') #DEAD
79 if m
.acceptor_splice_b
is None: m
.acceptor_splice_b
=parse_value(l
, 'acceptor_splice_b')
80 if m
.acceptor_splice_order
is None: m
.acceptor_splice_order
=parse_value(l
, 'acceptor_splice_order')
81 if m
.acceptor_splice_window_left
is None: m
.acceptor_splice_window_left
=parse_value(l
, 'acceptor_splice_window_left')
82 if m
.acceptor_splice_window_right
is None: m
.acceptor_splice_window_right
=parse_value(l
, 'acceptor_splice_window_right')
83 if m
.acceptor_splice_alphas
is None: m
.acceptor_splice_alphas
=parse_vector(l
, fileptr
, 'acceptor_splice_alphas')
84 if m
.acceptor_splice_svs
is None: m
.acceptor_splice_svs
=parse_string(l
, fileptr
, 'acceptor_splice_svs')
87 if m
.donor_bins
is None: m
.donor_bins
=parse_value(l
, 'donor_bins')
88 if m
.donor_limits
is None: m
.donor_limits
=parse_vector(l
, fileptr
, 'donor_limits')
89 if m
.donor_penalties
is None: m
.donor_penalties
=parse_vector(l
, fileptr
, 'donor_penalties') #DEAD
91 if m
.donor_splice_b
is None: m
.donor_splice_b
=parse_value(l
, 'donor_splice_b')
92 if m
.donor_splice_order
is None: m
.donor_splice_order
=parse_value(l
, 'donor_splice_order')
93 #if m.donor_splice_use_gc is None: m.donor_splice_use_gc=parse_value(l, 'donor_splice_use_gc')
94 if m
.donor_splice_window_left
is None: m
.donor_splice_window_left
=parse_value(l
, 'donor_splice_window_left')
95 if m
.donor_splice_window_right
is None: m
.donor_splice_window_right
=parse_value(l
, 'donor_splice_window_right')
96 if m
.donor_splice_alphas
is None: m
.donor_splice_alphas
=parse_vector(l
, fileptr
, 'donor_splice_alphas')
97 if m
.donor_splice_svs
is None: m
.donor_splice_svs
=parse_string(l
, fileptr
, 'donor_splice_svs')
101 if m
.intron_len_bins
is None: m
.intron_len_bins
=parse_value(l
, 'intron_len_bins')
102 if m
.intron_len_limits
is None: m
.intron_len_limits
=parse_vector(l
, fileptr
, 'intron_len_limits')
103 if m
.intron_len_penalties
is None: m
.intron_len_penalties
=parse_vector(l
, fileptr
, 'intron_len_penalties')
104 if m
.intron_len_min
is None: m
.intron_len_min
=parse_value(l
, 'intron_len_min')
105 if m
.intron_len_max
is None: m
.intron_len_max
=parse_value(l
, 'intron_len_max')
106 if m
.intron_len_transform
is None: m
.intron_len_transform
=parse_value(l
, 'intron_len_transform')
109 if m
.gene_len_max
is None: m
.gene_len_max
=parse_value(l
, 'gene_len_max')
111 if m
.substitution_matrix
is None: m
.substitution_matrix
=parse_vector(l
, fileptr
, 'substitution_matrix')
115 sys
.stderr
.write('done\n')
118 def parse_value(line
, name
):
119 if (line
.startswith(name
)):
120 #print 'found ' + name
121 sys
.stdout
.write('.'); sys
.stdout
.flush()
122 str = line
[line
.find('=')+1:-1] ;
131 def parse_vector(line
, fileptr
, name
):
132 mat
= parse_matrix(line
, fileptr
, name
)
136 mat
= numpy
.array(mat
).flatten()
139 def parse_matrix(line
, fileptr
, name
):
140 if (line
.startswith(name
)):
141 sys
.stdout
.write('.'); sys
.stdout
.flush()
142 if line
.find(']') < 0:
144 while l
is not None and l
.find(']') < 0:
147 if l
is not None and l
.find(']') >= 0:
150 if line
.find(']') < 0:
151 sys
.stderr
.write("matrix `" + name
+ "' ended without ']'\n")
154 return mat(line
[line
.find('['):line
.find(']')+1])
158 def parse_string(line
, fileptr
, name
):
159 if (line
.startswith(name
)):
160 sys
.stdout
.write('.'); sys
.stdout
.flush()
163 while l
is not None and l
.find(']') < 0:
165 lines
+=[list(l
[:-1])]
169 sys
.stderr
.write("string ended without ']'\n")
172 #seqlen=len(lines[0])
174 #trdat = chararray((seqlen,num),1,order='FORTRAN')
175 #for i in xrange(num):
176 # trdat[:,i]=lines[i]
178 for idx
,example
in enumerate(lines
):
179 trdat
[idx
] = ''.join(example
)
184 if __name__
== '__main__':
185 #import hotshot, hotshot.stats
188 f
='/fml/ag-raetsch/share/projects/palma/param_files/arabidopsis_ath1_ss=1_il=1.param.bz2'
191 print m
.acceptor_bins
is None
192 print m
.acceptor_limits
is None
193 print m
.acceptor_penalties
is None
195 print m
.acceptor_splice_b
is None
196 print m
.acceptor_splice_order
is None
197 print m
.acceptor_splice_window_left
is None
198 print m
.acceptor_splice_window_right
is None
199 print m
.acceptor_splice_alphas
is None
200 print m
.acceptor_splice_svs
is None
202 print m
.donor_bins
is None
203 print m
.donor_limits
is None
204 print m
.donor_penalties
is None
206 print m
.donor_splice_b
is None
207 print m
.donor_splice_order
is None
208 #print m.donor_splice_use_gc is None
209 print m
.donor_splice_window_left
is None
210 print m
.donor_splice_window_right
is None
211 print m
.donor_splice_alphas
is None
212 print m
.donor_splice_svs
is None
213 print m
.intron_len_bins
is None
214 print m
.intron_len_limits
is None
215 print m
.intron_len_penalties
is None
216 print m
.intron_len_min
is None
217 print m
.intron_len_max
is None
218 print m
.intron_len_transform
is None
220 print m
.substitution_matrix
is None
222 print 'intron_len_transform'
223 print m
.intron_len_transform
224 print 'substitution_matrix'
225 print m
.substitution_matrix
229 #prof = hotshot.Profile("model.prof")
230 #benchtime = prof.runcall(load)
232 #stats = hotshot.stats.load("model.prof")
234 #stats.sort_stats('time', 'calls')
235 #stats.print_stats(20)