+ added some testcases for sequence_utils module
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Tue, 29 Jul 2008 17:36:12 +0000 (17:36 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Tue, 29 Jul 2008 17:36:12 +0000 (17:36 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@10220 e1793c9e-67f9-0310-80fc-b846ff1f7b36

tests/test_sequence_utils.py

index c2a3163..bd8d5dd 100644 (file)
 # -*- coding: utf-8 -*- 
 
 import pdb
+import unittest
 import numpy
-import qpalma.sequence_utils
 
-   #flat_files = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/tests/test_data'
+from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper,reverse_complement
+from qpalma.Lookup import LookupTable
+
+class TestSequenceUtils(unittest.TestCase):
+
+
+   def setUp(self):
+      self.strands = ['+','']
+
+
+   def testThalianaData(self):
+      g_dir    = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+      acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+      don_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+      g_fmt = 'chr%d.dna.flat'
+      s_fmt = 'contig_%d%s'
+
+      num_chromo = 6
+
+      accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+      seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
+   
+      #for chromo in range(1,num_chromo):
+      #   dna,acc,don = seqInfo.get_seq_and_scores(chromo,'+',0,1369)
+      #   dna_,acc_,don_ = seqInfo.get_seq_and_scores(chromo,'-',0,1369)
+
+      #   self.assertEqual(len(dna),len(dna_))
+      #   self.assertEqual(dna,dna_)
+
+      dna,acc,don = seqInfo.get_seq_and_scores(1,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(2,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(3,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(4,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(5,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(1,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(2,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(3,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(4,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(5,'-',0,1369)
+
+
+   def testLyrataData(self):
+      g_dir    = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+      acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+      don_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+      g_fmt = 'contig%d.dna.flat'
+      s_fmt = 'contig_%d%s'
+
+      num_chromo = 1099
+   
+      accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+      seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
+
+      dna,acc,don = seqInfo.get_seq_and_scores(1,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(2,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(3,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(4,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(5,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(1,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(2,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(3,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(4,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(5,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(45,'-',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(45,'+',0,1369)
+
+      print 'Finished'
+      #num_tests = 10
+      #for chromo in range(1,6):
+      #   for strand in ['+','-']:
+      #      for test_idx in range(num_tests):
+      #         if strand == '-':
+      #            size = seqInfo.chromo_sizes[chromo+7]
+      #         else:
+      #            size = seqInfo.chromo_sizes[chromo]
+      #         begin = random.randint(0,size)
+      #         end   = random.randint(b,size)
+      #         dna,acc,don = seqInfo.get_seq_and_scores(chromo,strand,begin,end)
+
+
+   def tearDown(self):
+      pass
+
+
+class TestLookupTable(unittest.TestCase):
+
+   def setUp(self):
+      pass
+
+
+   def testTableThalianaData(self):
+      g_dir    = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+      acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+      don_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+      g_fmt = 'chr%d.dna.flat'
+      s_fmt = 'contig_%d%s'
+
+      lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,100))
+
+
+   def testTableLyrataData(self):
+      g_dir    = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+      acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+      don_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+      g_fmt = 'contig%d.dna.flat'
+      s_fmt = 'contig_%d%s'
+
+      lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,100))
+
+
+   def tearDown(self):
+      pass
+
+
+def check_wrapper():
+   g_dir    = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
+   acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
+   don_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
+
+   g_fmt = 'contig%d.dna.flat'
+   s_fmt = 'contig_%d%s.Conf_cum'
+
+   test = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+
+   for idx in range(1,100):
+      pos = test.get_genomic_fragment_fn(idx,'+')
+      neg = test.get_genomic_fragment_fn(idx,'-')
+      print pos,neg
+      assert os.path.exists(pos)
+      assert os.path.exists(neg)
+
+      acc_fn,don_fn = test.get_splice_site_scores_fn(idx,'+')
+      print acc_fn,don_fn
+      assert os.path.exists(acc_fn)
+      assert os.path.exists(don_fn)
+      acc_fn,don_fn = test.get_splice_site_scores_fn(idx,'-')
+      print acc_fn,don_fn
+      assert os.path.exists(acc_fn)
+      assert os.path.exists(don_fn)
 
-   #begin =   0
-   #end   =  60
-   #dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
-   #dna = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
-   #print dna
 
 def check_positions(dna,acc,don,offset=0):
    first_gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')][:offset]
@@ -75,11 +212,12 @@ def run3():
    
 def run4():
    flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   chromo   = 8
-   strand   = '-'
+   seqInfo = SeqSpliceInfo(flat_files,range(1,6))
+   chromo   = 3
+   strand   = '+'
    begin    = 200
    end      = 1200
-   dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
+   dna,acc,don = seqInfo.get_seq_and_scores(chromo,strand,begin,end)
 
    check_positions(dna,acc,don)
 
@@ -111,8 +249,13 @@ def check_positive_strand(b,e):
    check_positions(dna,acc,don)
    print 'fine'
 
+
 if __name__ == '__main__':
    #run()
    #run2()
    #run3(
-   run4()
+   #run4()
+   #perform_checks()
+   suite = unittest.TestLoader().loadTestsFromTestCase(TestSequenceUtils)
+   unittest.TextTestRunner(verbosity=2).run(suite)
+