2 # -*- coding: utf-8 -*-
8 from qpalma
.sequence_utils
import SeqSpliceInfo
,DataAccessWrapper
,reverse_complement
9 from qpalma
.Lookup
import LookupTable
11 class TestSequenceUtils(unittest
.TestCase
):
15 self
.strands
= ['+','']
17 g_dir
= '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
18 acc_dir
= '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
19 don_dir
= '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
21 g_fmt
= 'chr%d.dna.flat'
26 accessWrapper
= DataAccessWrapper(g_dir
,acc_dir
,don_dir
,g_fmt
,s_fmt
)
27 self
.seqInfo
= SeqSpliceInfo(accessWrapper
,range(1,num_chromo
))
29 print self
.seqInfo
.chromo_sizes
31 #self.lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,2))
34 def testThalianaDataExamples(self
):
35 seq
= 'TGAAAACAGGAACGGATTGGAGAAAGGCGTCTCGTCAT'.lower()
40 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
41 self
.assertEqual(seq
,dna
)
43 seq
= 'AGGCAATGAAACTGATGCATTGGACTTGACGGGTGTTG'.lower()
47 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
48 self
.assertEqual(seq
,dna
)
50 seq
= 'TCTTGGTGGAGGAGCTAACACCGTAGCTGACGGTTACA'.lower()
54 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
55 self
.assertEqual(seq
,dna
)
57 seq
= 'TTGGAAGACAGAGTCAACCATACCCTTGCCTCTGGTGA'.lower()
61 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
62 self
.assertEqual(seq
,dna
)
64 seq
= 'CTGGCCAAAAGCTCAGGGAAGACGCAGCCTAGGGCTCC'.lower()
65 seq
= reverse_complement(seq
)
69 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
70 self
.assertEqual(seq
,dna
)
72 seq
= 'TTTTTCCCTTCTAGAAGACCGTAAAGGTAAACTTCTAA'.lower()
73 seq
= reverse_complement(seq
)
77 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
78 self
.assertEqual(seq
,dna
)
80 seq
= 'CACGGTGCAGATGAAGAACTGAGATCCGTTCGTGTTTG'.lower()
81 seq
= reverse_complement(seq
)
85 dna
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
86 self
.assertEqual(seq
,dna
)
89 seq
= 'CACGGTGCAGATGAAGAACTGAGATCCGTTCGTGTTTG'.lower()
90 seq
= reverse_complement(seq
)
94 dna
,acc
,don
= self
.seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38+2*window
,False)
95 self
.assertEqual(seq
,dna
[window
:-window
])
98 #print "".join(map(lambda x: ['_','x'][x!=-numpy.inf],acc))
99 #print "".join(map(lambda x: ['_','x'][x!=-numpy.inf],don))
102 def _testThalianaDataGeneric(self
):
104 dna
,acc
,don
= self
.seqInfo
.get_seq_and_scores(1,'+',1000,1369)
105 dna_
,acc_
,don_
= self
.lt1
.get_seq_and_scores(1,'+',1000,1369,'')
107 self
.assertEqual(dna
,dna_
)
108 self
.assertEqual(acc
,acc_
)
109 self
.assertEqual(don
,don_
)
111 dna
,acc
,don
= self
.seqInfo
.get_seq_and_scores(1,'-',1000,1369)
112 dna_
,acc_
,don_
= self
.lt1
.get_seq_and_scores(1,'-',1000,1369,'')
114 self
.assertEqual(dna
,dna_
)
115 self
.assertEqual(acc
,acc_
)
116 self
.assertEqual(don
,don_
)
118 #dna,acc,don = seqInfo.get_seq_and_scores(2,'+',1000,1369)
119 #dna,acc,don = seqInfo.get_seq_and_scores(3,'+',1000,1369)
120 #dna,acc,don = seqInfo.get_seq_and_scores(4,'+',1000,1369)
121 #dna,acc,don = seqInfo.get_seq_and_scores(5,'+',1000,1369)
123 #dna,acc,don = seqInfo.get_seq_and_scores(1,'-',1000,1369)
124 #dna_,acc_,don_ = lt1.get_seq_and_scores(1,'-',1000,1369,'')
126 #self.assertEqual(dna,dna_)
127 #self.assertEqual(acc,acc_)
128 #self.assertEqual(don,don_)
131 #dna,acc,don = seqInfo.get_seq_and_scores(2,'-',1000,1369)
132 #dna,acc,don = seqInfo.get_seq_and_scores(3,'-',1000,1369)
133 #dna,acc,don = seqInfo.get_seq_and_scores(4,'-',1000,1369)
134 #dna,acc,don = seqInfo.get_seq_and_scores(5,'-',1000,1369)
137 def _testLyrataData(self
):
138 g_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
139 acc_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
140 don_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
142 g_fmt
= 'contig%d.dna.flat'
143 s_fmt
= 'contig_%d%s'
147 accessWrapper
= DataAccessWrapper(g_dir
,acc_dir
,don_dir
,g_fmt
,s_fmt
)
148 seqInfo
= SeqSpliceInfo(accessWrapper
,range(1,num_chromo
))
150 dna
,acc
,don
= seqInfo
.get_seq_and_scores(1,'+',1,1369)
151 dna
,acc
,don
= seqInfo
.get_seq_and_scores(2,'+',1,1369)
152 dna
,acc
,don
= seqInfo
.get_seq_and_scores(3,'+',1,1369)
153 dna
,acc
,don
= seqInfo
.get_seq_and_scores(4,'+',1,1369)
154 dna
,acc
,don
= seqInfo
.get_seq_and_scores(5,'+',1,1369)
155 dna
,acc
,don
= seqInfo
.get_seq_and_scores(1,'-',1,1369)
156 dna
,acc
,don
= seqInfo
.get_seq_and_scores(2,'-',1,1369)
157 dna
,acc
,don
= seqInfo
.get_seq_and_scores(3,'-',1,1369)
158 dna
,acc
,don
= seqInfo
.get_seq_and_scores(4,'-',1,1369)
159 dna
,acc
,don
= seqInfo
.get_seq_and_scores(5,'-',1,1369)
160 dna
,acc
,don
= seqInfo
.get_seq_and_scores(45,'-',1,1369)
161 dna
,acc
,don
= seqInfo
.get_seq_and_scores(45,'+',1,1369)
165 #for chromo in range(1,6):
166 # for strand in ['+','-']:
167 # for test_idx in range(num_tests):
169 # size = seqInfo.chromo_sizes[chromo+7]
171 # size = seqInfo.chromo_sizes[chromo]
172 # begin = random.randint(0,size)
173 # end = random.randint(b,size)
174 # dna,acc,don = seqInfo.get_seq_and_scores(chromo,strand,begin,end)
181 class TestLookupTable(unittest
.TestCase
):
187 def testTableThalianaData(self
):
188 g_dir
= '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
189 acc_dir
= '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
190 don_dir
= '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
192 g_fmt
= 'chr%d.dna.flat'
193 s_fmt
= 'contig_%d%s'
197 lt1
= LookupTable(g_dir
,acc_dir
,don_dir
,g_fmt
,s_fmt
,range(1,num_chromo
))
199 accessWrapper
= DataAccessWrapper(g_dir
,acc_dir
,don_dir
,g_fmt
,s_fmt
)
200 seqInfo
= SeqSpliceInfo(accessWrapper
,range(1,num_chromo
))
202 seq
= 'CTGGCCAAAAGCTCAGGGAAGACGCAGCCTAGGGCTCC'.lower()
203 seq
= reverse_complement(seq
)
207 dna
= seqInfo
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
208 self
.assertEqual(seq
,dna
)
210 dna
= lt1
.get_seq_and_scores(chromo
,strand
,pos
,pos
+38,True)
212 self
.assertEqual(seq
,dna
)
214 dna
,acc
,don
= seqInfo
.get_seq_and_scores(1,'+',1,1369)
215 dna_
,acc_
,don_
= lt1
.get_seq_and_scores(1,'+',1,1369)
219 def _testTableLyrataData(self
):
220 g_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
221 acc_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
222 don_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
224 g_fmt
= 'contig%d.dna.flat'
225 s_fmt
= 'contig_%d%s'
227 lt1
= LookupTable(g_dir
,acc_dir
,don_dir
,g_fmt
,s_fmt
,range(0,1099))
235 g_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
236 acc_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
237 don_dir
= '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
239 g_fmt
= 'contig%d.dna.flat'
240 s_fmt
= 'contig_%d%s.Conf_cum'
242 test
= DataAccessWrapper(g_dir
,acc_dir
,don_dir
,g_fmt
,s_fmt
)
244 for idx
in range(1,100):
245 pos
= test
.get_genomic_fragment_fn(idx
,'+')
246 neg
= test
.get_genomic_fragment_fn(idx
,'-')
248 assert os
.path
.exists(pos
)
249 assert os
.path
.exists(neg
)
251 acc_fn
,don_fn
= test
.get_splice_site_scores_fn(idx
,'+')
253 assert os
.path
.exists(acc_fn
)
254 assert os
.path
.exists(don_fn
)
255 acc_fn
,don_fn
= test
.get_splice_site_scores_fn(idx
,'-')
257 assert os
.path
.exists(acc_fn
)
258 assert os
.path
.exists(don_fn
)
261 if __name__
== '__main__':
262 #suite = unittest.TestLoader().loadTestsFromTestCase(TestSequenceUtils)
263 suite
= unittest
.TestLoader().loadTestsFromTestCase(TestLookupTable
)
264 unittest
.TextTestRunner(verbosity
=2).run(suite
)