# -*- coding: utf-8 -*-
import pdb
+import unittest
import numpy
-import qpalma.sequence_utils
- #flat_files = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/tests/test_data'
+from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper,reverse_complement
+from qpalma.Lookup import LookupTable
+
+class TestSequenceUtils(unittest.TestCase):
+
+
+ def setUp(self):
+ self.strands = ['+','']
+
+
+ def testThalianaData(self):
+ g_dir = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+ acc_dir = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+ don_dir = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+ g_fmt = 'chr%d.dna.flat'
+ s_fmt = 'contig_%d%s'
+
+ num_chromo = 6
+
+ accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+ seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
+
+ #for chromo in range(1,num_chromo):
+ # dna,acc,don = seqInfo.get_seq_and_scores(chromo,'+',0,1369)
+ # dna_,acc_,don_ = seqInfo.get_seq_and_scores(chromo,'-',0,1369)
+
+ # self.assertEqual(len(dna),len(dna_))
+ # self.assertEqual(dna,dna_)
+
+ dna,acc,don = seqInfo.get_seq_and_scores(1,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(2,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(3,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(4,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(5,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(1,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(2,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(3,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(4,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(5,'-',0,1369)
+
+
+ def testLyrataData(self):
+ g_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+ acc_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+ don_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+ g_fmt = 'contig%d.dna.flat'
+ s_fmt = 'contig_%d%s'
+
+ num_chromo = 1099
+
+ accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+ seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
+
+ dna,acc,don = seqInfo.get_seq_and_scores(1,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(2,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(3,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(4,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(5,'+',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(1,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(2,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(3,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(4,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(5,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(45,'-',0,1369)
+ dna,acc,don = seqInfo.get_seq_and_scores(45,'+',0,1369)
+
+ print 'Finished'
+ #num_tests = 10
+ #for chromo in range(1,6):
+ # for strand in ['+','-']:
+ # for test_idx in range(num_tests):
+ # if strand == '-':
+ # size = seqInfo.chromo_sizes[chromo+7]
+ # else:
+ # size = seqInfo.chromo_sizes[chromo]
+ # begin = random.randint(0,size)
+ # end = random.randint(b,size)
+ # dna,acc,don = seqInfo.get_seq_and_scores(chromo,strand,begin,end)
+
+
+ def tearDown(self):
+ pass
+
+
+class TestLookupTable(unittest.TestCase):
+
+ def setUp(self):
+ pass
+
+
+ def testTableThalianaData(self):
+ g_dir = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+ acc_dir = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+ don_dir = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+ g_fmt = 'chr%d.dna.flat'
+ s_fmt = 'contig_%d%s'
+
+ lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,100))
+
+
+ def testTableLyrataData(self):
+ g_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+ acc_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+ don_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+ g_fmt = 'contig%d.dna.flat'
+ s_fmt = 'contig_%d%s'
+
+ lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,100))
+
+
+ def tearDown(self):
+ pass
+
+
+def check_wrapper():
+ g_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+ acc_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+ don_dir = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+ g_fmt = 'contig%d.dna.flat'
+ s_fmt = 'contig_%d%s.Conf_cum'
+
+ test = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+
+ for idx in range(1,100):
+ pos = test.get_genomic_fragment_fn(idx,'+')
+ neg = test.get_genomic_fragment_fn(idx,'-')
+ print pos,neg
+ assert os.path.exists(pos)
+ assert os.path.exists(neg)
+
+ acc_fn,don_fn = test.get_splice_site_scores_fn(idx,'+')
+ print acc_fn,don_fn
+ assert os.path.exists(acc_fn)
+ assert os.path.exists(don_fn)
+ acc_fn,don_fn = test.get_splice_site_scores_fn(idx,'-')
+ print acc_fn,don_fn
+ assert os.path.exists(acc_fn)
+ assert os.path.exists(don_fn)
- #begin = 0
- #end = 60
- #dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
- #dna = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
- #print dna
def check_positions(dna,acc,don,offset=0):
first_gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')][:offset]
def run4():
flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
- chromo = 8
- strand = '-'
+ seqInfo = SeqSpliceInfo(flat_files,range(1,6))
+ chromo = 3
+ strand = '+'
begin = 200
end = 1200
- dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
+ dna,acc,don = seqInfo.get_seq_and_scores(chromo,strand,begin,end)
check_positions(dna,acc,don)
check_positions(dna,acc,don)
print 'fine'
+
if __name__ == '__main__':
#run()
#run2()
#run3(
- run4()
+ #run4()
+ #perform_checks()
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestSequenceUtils)
+ unittest.TextTestRunner(verbosity=2).run(suite)
+