ed11832b784e12e96bcef802b4bf44524e954374
[qpalma.git] / tests / test_qpalma.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12
13 import cPickle
14 import random
15 import sys
16 import math
17 import numpy
18 from numpy import inf
19 import os.path
20 import pdb
21 import array
22 import unittest
23
24 from qpalma.qpalma_main import QPalma
25 from qpalma.utils import print_prediction
26
27 from qpalma.Run import Run
28
29 from qpalma.SettingsParser import parseSettings
30 from qpalma.OutputFormat import alignment_reconstruct
31 from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo,reverse_complement
32
33 from qpalma.Lookup import LookupTable
34
35 jp = os.path.join
36
37
38 class TestQPalmaTraining(unittest.TestCase):
39
40 def setUp(self):
41
42 self.settings = parseSettings('testcase.conf')
43
44
45 def testInitialization(self):
46 pass
47
48
49 def test_preprocessExample(self):
50
51 currentSeqInfo = ()
52 originalRead =
53 currentQualities =
54 currentExons =
55
56 training_set[1] = (currentSeqInfo,originalRead,currentQualities,currentExons)
57
58 dna,read,acc_supp,don_supp,exons,originalRead,currentQualities =\
59 preprocessExample(training_set,1,seqInfo,settings)
60
61
62 def test_performAlignment(self):
63 """
64 """
65 #performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,current_num_path,prediction_mode,settings)
66 pass
67
68
69
70 def test_train(self):
71 """
72 This function
73 """
74
75 set_name = 'test_train'
76
77 qp = QPalma(seqInfo,True)
78 qp.train(self.training_set,self.settings,set_name)
79
80
81
82 class TestQPalmaPrediction(unittest.TestCase):
83 """
84 This class...
85 """
86
87
88 def _setUp(self):
89 self.prediction_set = {}
90
91 # chr1 + 20-120
92 read = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
93 currentQualities = [[40]*len(read)]
94
95 id = 3
96 chromo = 1
97 strand = '+'
98
99 genomicSeq_start = 3500
100 genomicSeq_stop = 6500
101 print 'Position: ',
102 print genomicSeq_start,genomicSeq_stop
103
104 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
105
106 example = (currentSeqInfo,read,currentQualities)
107 self.prediction_set[id] = [example]
108
109 # chr1 - 5000-5100
110 read = 'ctgtgtatctggttgctcaatatgctcgccggaaaatgaagatcatggatgctgtgagttctccttattgttcattatcaaactgatatgagtttctgat'
111 currentQualities = [[40]*len(read)]
112
113 id = 4
114 chromo = 1
115 strand = '-'
116
117 total_size = 30432563
118
119 genomicSeq_start = total_size - 6500
120 genomicSeq_stop = total_size - 3500
121 print 'Position: ',
122 print genomicSeq_start,genomicSeq_stop
123
124 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
125
126 example = (currentSeqInfo,read,currentQualities)
127 self.prediction_set[id] = [example]
128
129
130 def testAlignments(self):
131
132 settings = parseSettings('testcase.conf')
133
134 print self.prediction_set
135 for example_key in self.prediction_set.keys():
136 print 'Current example %d' % example_key
137
138 for example in self.prediction_set[example_key]:
139 print example
140 print 'size'
141 print len(example)
142
143 accessWrapper = DataAccessWrapper(settings)
144 seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
145
146 qp = QPalma(seqInfo,True)
147 allPredictions = qp.predict(self.prediction_set,settings)
148
149 for current_prediction in allPredictions:
150 align_str = print_prediction(current_prediction)
151 print align_str
152
153 id = current_prediction['id']
154 seq = current_prediction['read']
155 dna = current_prediction['dna']
156 chromo = current_prediction['chr']
157 strand = current_prediction['strand']
158 start_pos = current_prediction['start_pos']
159 predExons = current_prediction['predExons']
160
161 numExons = int(math.ceil(len(predExons) / 2))
162
163 print alignment_reconstruct(current_prediction,numExons)
164 print id,start_pos,predExons
165
166 print 'Problem counter is %d' % qp.problem_ctr
167
168
169 def _testAlignments(self):
170 run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
171
172 run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
173 run['name'] = 'test_run'
174 run['result_dir'] = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
175
176 param_fn = jp(run_dir,'param_526.pickle')
177 param = cPickle.load(open(param_fn))
178
179 print self.prediction_set
180 for example_key in self.prediction_set.keys():
181 print 'Current example %d' % example_key
182
183 for example in self.prediction_set[example_key]:
184 print example
185 print 'size'
186 print len(example)
187
188 # fetch the data needed
189 settings = {}
190
191 settings['genome_dir'] = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
192 settings['acceptor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
193 settings['donor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
194
195 settings['genome_file_fmt'] = 'chr%d.dna.flat'
196 settings['splice_score_file_fmt']= 'contig_%d%s'
197
198 allowed_fragments = [1]
199 settings['allowed_fragments'] = allowed_fragments
200
201 accessWrapper = DataAccessWrapper(settings)
202 seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
203
204 qp = QPalma(run,seqInfo,True)
205 allPredictions = qp.predict(self.prediction_set,param)
206
207 for current_prediction in allPredictions:
208 align_str = print_prediction(current_prediction)
209 print align_str
210
211 id = current_prediction['id']
212 seq = current_prediction['read']
213 dna = current_prediction['dna']
214 chromo = current_prediction['chr']
215 strand = current_prediction['strand']
216 start_pos = current_prediction['start_pos']
217 predExons = current_prediction['predExons']
218
219 numExons = int(math.ceil(len(predExons) / 2))
220
221 print alignment_reconstruct(current_prediction,numExons)
222 print id,start_pos,predExons
223
224 print 'Problem counter is %d' % qp.problem_ctr
225
226
227 if __name__ == '__main__':
228 suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
229 suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
230 unittest.TextTestRunner(verbosity=2).run(suite)