+ fixed automatic Python path settings in Makefiles
[qpalma.git] / tests / test_qpalma.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12
13 import cPickle
14 import random
15 import sys
16 import math
17 import numpy
18 from numpy import inf
19 import os.path
20 import shutil
21 import pdb
22 import array
23 import unittest
24
25 from qpalma.qpalma_main import QPalma,preprocessExample
26 from qpalma.utils import print_prediction
27
28 from qpalma.Run import Run
29
30 from qpalma.SettingsParser import parseSettings
31 from qpalma.DatasetUtils import processQuality
32 from qpalma.OutputFormat import alignment_reconstruct
33 from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo
34
35 from qpalma.Lookup import LookupTable
36
37 jp = os.path.join
38
39
40 def createTrainingSet():
41
42 dataset = {}
43
44 id = 1111
45 chromo = 1
46 strand = '+'
47 up_cut = 117
48 down_cut = 383
49
50 currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
51
52 # tcaatacaaatcctatttcttgtggttttctttccttcacttagctatggatggtttatcttcattt
53 # tcaatacaaatcctatttctt ctatggatggtttatcttcattt
54
55 originalRead = 'tcaatacaaatcctatttcttctatggatggtttatcttcattt'
56 raw_qualities = 'h'*len(originalRead)
57 prb_offset = 64
58 quality_interval = (-5,40)
59 perform_checks = True
60 quality = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
61 currentQualities = [quality]
62
63 exons = numpy.mat([217,238,261,284]).reshape((2,2))
64
65 dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
66
67 id = 2222
68 up_cut = 1560
69 down_cut = 1901
70
71 currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
72
73 # tcctttaagattttgtttttataatgtgttcttccatccacatctatctccatatgatatggaccatatcatacatcatcatttgtccaaatgcatgaatgaatttggaaataggtacgagaatgccaacaatgacaagaa
74 # tcctttaagattttgtttttataatgt gtacgagaatgccaacaatgacaagaa
75
76 originalRead = 'tcctttaagattttgtttttataatgtgtacgagaatgccaacaatgacaagaa'
77 raw_qualities = 'h'*len(originalRead)
78 prb_offset = 64
79 quality_interval = (-5,40)
80 perform_checks = True
81 quality = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
82 currentQualities = [quality]
83
84 exons = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
85
86 dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
87
88 return dataset
89
90
91 def createPredictionSet():
92
93 dataset = {}
94
95 id = 1111
96 chromo = 1
97 strand = '+'
98 up_cut = 117
99 down_cut = 383
100
101 currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
102
103 # tcaatacaaatcctatttcttgtggttttctttccttcacttagctatggatggtttatcttcattt
104 # tcaatacaaatcctatttctt ctatggatggtttatcttcattt
105
106 originalRead = 'tcaatacaaatcctatttcttctatggatggtttatcttcattt'
107 raw_qualities = 'h'*len(originalRead)
108 prb_offset = 64
109 quality_interval = (-5,40)
110 perform_checks = True
111 quality = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
112 currentQualities = [quality]
113
114 exons = numpy.mat([217,238,261,284]).reshape((2,2))
115
116 dataset.setdefault(id, []).append((currentSeqInfo,originalRead,currentQualities))
117
118 id = 2222
119 up_cut = 1560
120 down_cut = 1901
121
122 currentSeqInfo = (id,chromo,strand,up_cut,down_cut)
123
124 # tcctttaagattttgtttttataatgtgttcttccatccacatctatctccatatgatatggaccatatcatacatcatcatttgtccaaatgcatgaatgaatttggaaataggtacgagaatgccaacaatgacaagaa
125 # tcctttaagattttgtttttataatgt gtacgagaatgccaacaatgacaagaa
126
127 originalRead = 'tcctttaagattttgtttttataatgtgtacgagaatgccaacaatgacaagaa'
128 raw_qualities = 'h'*len(originalRead)
129 prb_offset = 64
130 quality_interval = (-5,40)
131 perform_checks = True
132 quality = processQuality(raw_qualities,prb_offset,quality_interval,perform_checks)
133 currentQualities = [quality]
134
135 exons = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
136
137 dataset.setdefault(id, []).append((currentSeqInfo,originalRead,currentQualities))
138
139
140 return dataset
141
142
143 class TestQPalmaTraining(unittest.TestCase):
144 """
145 """
146
147 def setUp(self):
148 dir = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
149 if os.path.exists(dir):
150 shutil.rmtree(dir)
151 os.mkdir(dir)
152
153 self.settings = parseSettings('train_testcase.conf')
154
155 accessWrapper = DataAccessWrapper(self.settings)
156 self.seqInfo = SeqSpliceInfo(accessWrapper,self.settings['allowed_fragments'])
157
158 self.training_set = createTrainingSet()
159
160
161 def testInitialization(self):
162 pass
163
164
165 def _test_preprocessExample(self):
166 """
167 """
168
169 id = 2222
170 (currentSeqInfo,originalRead,currentQualities,exons) = self.training_set[id]
171 print originalRead
172
173 try:
174 preprocessExample(self.training_set,id,self.seqInfo,self.settings)
175 except:
176 print sys.exc_info()
177
178
179 def _test_performAlignment(self):
180 """
181 """
182 #performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,current_num_path,prediction_mode,settings)
183 pass
184
185
186 def test_train(self):
187 """
188 This function
189 """
190
191 set_name = 'test_train'
192
193 qp = QPalma(self.seqInfo,True)
194 qp.train(self.training_set,self.settings,set_name)
195
196
197
198 class TestQPalmaPrediction(unittest.TestCase):
199 """
200 This class...
201 """
202
203
204 def setUp(self):
205 """
206 """
207
208 dir = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
209 if os.path.exists(dir):
210 shutil.rmtree(dir)
211 os.mkdir(dir)
212
213 self.settings = parseSettings('train_testcase.conf')
214
215 accessWrapper = DataAccessWrapper(self.settings)
216 self.seqInfo = SeqSpliceInfo(accessWrapper,self.settings['allowed_fragments'])
217
218 self.prediction_set = createPredictionSet()
219
220
221 def test_predictionFromTrainedParameters(self):
222 self.prediction_set
223
224 qp = QPalma(self.seqInfo,True)
225
226 self.settings['prediction_param_fn'] = 'param_0.pickle'
227
228 allPredictions = qp.predict(self.prediction_set,self.settings)
229
230 for current_prediction in allPredictions:
231 align_str = print_prediction(current_prediction)
232 print align_str
233
234 id = current_prediction['id']
235 seq = current_prediction['read']
236 dna = current_prediction['dna']
237 chromo = current_prediction['chr']
238 strand = current_prediction['strand']
239 start_pos = current_prediction['start_pos']
240 predExons = current_prediction['predExons']
241
242 numExons = int(math.ceil(len(predExons) / 2))
243
244 print alignment_reconstruct(current_prediction,numExons)
245 print id
246 print [e + start_pos for e in predExons]
247
248 ## chr1 + 20-120
249 #read = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
250 #currentQualities = [[40]*len(read)]
251
252 #id = 3
253 #chromo = 1
254 #strand = '+'
255
256 #genomicSeq_start = 3500
257 #genomicSeq_stop = 6500
258 #print 'Position: ',
259 #print genomicSeq_start,genomicSeq_stop
260
261 #currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
262
263 #example = (currentSeqInfo,read,currentQualities)
264 #self.prediction_set[id] = [example]
265
266 ## chr1 - 5000-5100
267 #read = 'ctgtgtatctggttgctcaatatgctcgccggaaaatgaagatcatggatgctgtgagttctccttattgttcattatcaaactgatatgagtttctgat'
268 #currentQualities = [[40]*len(read)]
269
270 #id = 4
271 #chromo = 1
272 #strand = '-'
273
274 #total_size = 30432563
275
276 #genomicSeq_start = total_size - 6500
277 #genomicSeq_stop = total_size - 3500
278 #print 'Position: ',
279 #print genomicSeq_start,genomicSeq_stop
280
281 #currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
282
283 #example = (currentSeqInfo,read,currentQualities)
284 #self.prediction_set[id] = [example]
285
286
287 def _testAlignments(self):
288
289 settings = parseSettings('testcase.conf')
290
291 print self.prediction_set
292 for example_key in self.prediction_set.keys():
293 print 'Current example %d' % example_key
294
295 for example in self.prediction_set[example_key]:
296 print example
297 print 'size'
298 print len(example)
299
300 accessWrapper = DataAccessWrapper(settings)
301 seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
302
303 qp = QPalma(seqInfo,True)
304 allPredictions = qp.predict(self.prediction_set,settings)
305
306 for current_prediction in allPredictions:
307 align_str = print_prediction(current_prediction)
308 print align_str
309
310 id = current_prediction['id']
311 seq = current_prediction['read']
312 dna = current_prediction['dna']
313 chromo = current_prediction['chr']
314 strand = current_prediction['strand']
315 start_pos = current_prediction['start_pos']
316 predExons = current_prediction['predExons']
317
318 numExons = int(math.ceil(len(predExons) / 2))
319
320 print alignment_reconstruct(current_prediction,numExons)
321 print id,start_pos,predExons
322
323 print 'Problem counter is %d' % qp.problem_ctr
324
325
326 if __name__ == '__main__':
327 train_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
328 predict_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
329 #all_suites = unittest.TestSuite([train_suite, predict_suite])
330 all_suites = unittest.TestSuite([train_suite])
331 unittest.TextTestRunner(verbosity=2).run(all_suites)