0e41bb2add8f4a568de76f3d727a4ea9918e941d
[qpalma.git] / tests / test_qpalma_prediction.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 import cPickle
13 import math
14 import numpy
15 import os.path
16 import pdb
17 import unittest
18
19 from qpalma.qpalma_main import QPalma
20 from qpalma.utils import print_prediction
21
22 from qpalma.Run import Run
23
24 from qpalma.OutputFormat import alignment_reconstruct
25
26 from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo
27
28 jp = os.path.join
29
30
31 class TestQPalmaPrediction(unittest.TestCase):
32 """
33 This class...
34 """
35
36 def _setUp(self):
37 data_fn = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1/dataset/prediction_set.pickle'
38
39 self.prediction_set = cPickle.load(open(data_fn))
40
41
42
43 def _setUp(self):
44 print
45 self.prediction_set = {}
46
47 # xxxxxxxxxxx_______________________________________________________________________xxxxxxxxxxxxxxxxxxxxxxxxxxx
48 # 'tattttggaaggtatttcatctctccgacattgctctcaacactgtcccctccaatgcctagtccttttatttttttcttagttccaattcccttaaatacatctcacagtcttcttcttcttctcgattgcagtagc'
49
50 read = 'tattttggaagttccaattcccttaatacatctcacag'
51 currentQualities = [[30]*len(read)]
52
53 id = 1
54 chromo = 3
55 strand = '-'
56
57 tsize = 23470805
58
59 genomicSeq_start = tsize - (2038+10)
60 genomicSeq_stop = tsize - 1900 + 10
61 print 'Position: ',
62 print genomicSeq_start,genomicSeq_stop
63
64 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
65
66 example = (currentSeqInfo,read,currentQualities)
67 self.prediction_set[id] = [example]
68
69 # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxx
70 # 'gctactgcaatcgagaagaagaagaagactgtgagatgtatttaagggaattggaactaagaaaaaaataaaaggactaggcattggaggggacagtgttgagagcaatgtcggagagatgaaataccttccaaaata'
71
72 read = 'gctactgcaatcgagaagaagaagaagactatgaaata'
73 currentQualities = [[40]*30+[22]*8]
74
75 id = 2
76 chromo = 3
77 strand = '+'
78
79 tsize = 23470805
80
81 genomicSeq_start = tsize - (2038+10)
82 genomicSeq_stop = tsize - 1900 + 10
83 print 'Position: ',
84 print genomicSeq_start,genomicSeq_stop
85
86 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
87
88 example = (currentSeqInfo,read,currentQualities)
89 self.prediction_set[id] = [example]
90
91
92 def setUp(self):
93 self.prediction_set = {}
94
95 # chr1 + 20-120
96 read = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
97 currentQualities = [[40]*len(read)]
98
99 id = 3
100 chromo = 1
101 strand = '+'
102
103 genomicSeq_start = 3500
104 genomicSeq_stop = 6500
105 print 'Position: ',
106 print genomicSeq_start,genomicSeq_stop
107
108 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
109
110 example = (currentSeqInfo,read,currentQualities)
111 self.prediction_set[id] = [example]
112
113 # chr1 - 5000-5100
114 read = 'ctgtgtatctggttgctcaatatgctcgccggaaaatgaagatcatggatgctgtgagttctccttattgttcattatcaaactgatatgagtttctgat'
115 currentQualities = [[40]*len(read)]
116
117 id = 4
118 chromo = 1
119 strand = '-'
120
121 total_size = 30432563
122
123 genomicSeq_start = total_size - 6500
124 genomicSeq_stop = total_size - 3500
125 print 'Position: ',
126 print genomicSeq_start,genomicSeq_stop
127
128 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
129
130 example = (currentSeqInfo,read,currentQualities)
131 self.prediction_set[id] = [example]
132
133
134 def testAlignments(self):
135 run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
136
137 run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
138 run['name'] = 'test_run'
139 run['result_dir'] = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
140
141 param_fn = jp(run_dir,'param_526.pickle')
142 param = cPickle.load(open(param_fn))
143
144 print self.prediction_set
145 for example_key in self.prediction_set.keys():
146 print 'Current example %d' % example_key
147
148 for example in self.prediction_set[example_key]:
149 print example
150 print 'size'
151 print len(example)
152
153 # fetch the data needed
154 settings = {}
155
156 settings['genome_dir'] = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
157 settings['acceptor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
158 settings['donor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
159
160 settings['genome_file_fmt'] = 'chr%d.dna.flat'
161 settings['splice_score_file_fmt']= 'contig_%d%s'
162
163 allowed_fragments = [1]
164 settings['allowed_fragments'] = allowed_fragments
165
166 accessWrapper = DataAccessWrapper(settings)
167 seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
168
169 qp = QPalma(run,seqInfo,True)
170 allPredictions = qp.predict(self.prediction_set,param)
171
172 for current_prediction in allPredictions:
173 align_str = print_prediction(current_prediction)
174 print align_str
175
176 id = current_prediction['id']
177 seq = current_prediction['read']
178 dna = current_prediction['dna']
179 chromo = current_prediction['chr']
180 strand = current_prediction['strand']
181 start_pos = current_prediction['start_pos']
182 predExons = current_prediction['predExons']
183
184 numExons = int(math.ceil(len(predExons) / 2))
185
186 print alignment_reconstruct(current_prediction,numExons)
187 print id,start_pos,predExons
188
189 print 'Problem counter is %d' % qp.problem_ctr
190
191
192 if __name__ == '__main__':
193 suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
194 unittest.TextTestRunner(verbosity=2).run(suite)