+ cleaned up directories
[qpalma.git] / tests / test_qpalma_prediction.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # Written (W) 2008 Fabio De Bona
10 # Copyright (C) 2008 Max-Planck-Society
11
12 import cPickle
13 import math
14 import numpy
15 import os.path
16 import pdb
17 import unittest
18
19 from qpalma_main import QPalma
20 from qpalma.utils import print_prediction
21
22 from createAlignmentFileFromPrediction import alignment_reconstruct
23
24 from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo
25
26 jp = os.path.join
27
28
29 class TestQPalmaPrediction(unittest.TestCase):
30 """
31 This class...
32 """
33
34 def _setUp(self):
35 data_fn = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1/dataset/prediction_set.pickle'
36
37 self.prediction_set = cPickle.load(open(data_fn))
38
39
40
41 def _setUp(self):
42 print
43 self.prediction_set = {}
44
45 # xxxxxxxxxxx_______________________________________________________________________xxxxxxxxxxxxxxxxxxxxxxxxxxx
46 # 'tattttggaaggtatttcatctctccgacattgctctcaacactgtcccctccaatgcctagtccttttatttttttcttagttccaattcccttaaatacatctcacagtcttcttcttcttctcgattgcagtagc'
47
48 read = 'tattttggaagttccaattcccttaatacatctcacag'
49 currentQualities = [[30]*len(read)]
50
51 id = 1
52 chromo = 3
53 strand = '-'
54
55 tsize = 23470805
56
57 genomicSeq_start = tsize - (2038+10)
58 genomicSeq_stop = tsize - 1900 + 10
59 print 'Position: ',
60 print genomicSeq_start,genomicSeq_stop
61
62 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
63
64 example = (currentSeqInfo,read,currentQualities)
65 self.prediction_set[id] = [example]
66
67 # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxx
68 # 'gctactgcaatcgagaagaagaagaagactgtgagatgtatttaagggaattggaactaagaaaaaaataaaaggactaggcattggaggggacagtgttgagagcaatgtcggagagatgaaataccttccaaaata'
69
70 read = 'gctactgcaatcgagaagaagaagaagactatgaaata'
71 currentQualities = [[40]*30+[22]*8]
72
73 id = 2
74 chromo = 3
75 strand = '+'
76
77 tsize = 23470805
78
79 genomicSeq_start = tsize - (2038+10)
80 genomicSeq_stop = tsize - 1900 + 10
81 print 'Position: ',
82 print genomicSeq_start,genomicSeq_stop
83
84 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
85
86 example = (currentSeqInfo,read,currentQualities)
87 self.prediction_set[id] = [example]
88
89
90 def setUp(self):
91 self.prediction_set = {}
92
93 # chr1 + 20-120
94 read = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
95 currentQualities = [[40]*len(read)]
96
97 id = 3
98 chromo = 1
99 strand = '+'
100
101 genomicSeq_start = 3500
102 genomicSeq_stop = 6500
103 print 'Position: ',
104 print genomicSeq_start,genomicSeq_stop
105
106 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
107
108 example = (currentSeqInfo,read,currentQualities)
109 self.prediction_set[id] = [example]
110
111 # chr1 - 5000-5100
112 read = 'ctgtgtatctggttgctcaatatgctcgccggaaaatgaagatcatggatgctgtgagttctccttattgttcattatcaaactgatatgagtttctgat'
113 currentQualities = [[40]*len(read)]
114
115 id = 4
116 chromo = 1
117 strand = '-'
118
119 total_size = 30432563
120
121 genomicSeq_start = total_size - 6500
122 genomicSeq_stop = total_size - 3500
123 print 'Position: ',
124 print genomicSeq_start,genomicSeq_stop
125
126 currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
127
128 example = (currentSeqInfo,read,currentQualities)
129 self.prediction_set[id] = [example]
130
131
132 def testAlignments(self):
133 run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
134
135 run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
136 run['name'] = 'test_run'
137 run['result_dir'] = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
138
139 param_fn = jp(run_dir,'param_526.pickle')
140 param = cPickle.load(open(param_fn))
141
142 print self.prediction_set
143 for example_key in self.prediction_set.keys():
144 print 'Current example %d' % example_key
145
146 for example in self.prediction_set[example_key]:
147 print example
148 print 'size'
149 print len(example)
150
151 # fetch the data needed
152 settings = {}
153
154 settings['genome_dir'] = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
155 settings['acceptor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
156 settings['donor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
157
158 settings['genome_file_fmt'] = 'chr%d.dna.flat'
159 settings['splice_score_file_fmt']= 'contig_%d%s'
160
161 allowed_fragments = [1]
162 settings['allowed_fragments'] = allowed_fragments
163
164 accessWrapper = DataAccessWrapper(settings)
165 seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
166
167 qp = QPalma(run,seqInfo,True)
168 allPredictions = qp.predict(self.prediction_set,param)
169
170 for current_prediction in allPredictions:
171 align_str = print_prediction(current_prediction)
172 print align_str
173
174 id = current_prediction['id']
175 seq = current_prediction['read']
176 dna = current_prediction['dna']
177 chromo = current_prediction['chr']
178 strand = current_prediction['strand']
179 start_pos = current_prediction['start_pos']
180 predExons = current_prediction['predExons']
181
182 numExons = int(math.ceil(len(predExons) / 2))
183
184 print alignment_reconstruct(current_prediction,numExons)
185 print id,start_pos,predExons
186
187 print 'Problem counter is %d' % qp.problem_ctr
188
189
190 if __name__ == '__main__':
191 suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
192 unittest.TextTestRunner(verbosity=2).run(suite)