+ added some testcases
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 6 Aug 2008 15:21:08 +0000 (15:21 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 6 Aug 2008 15:21:08 +0000 (15:21 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@10342 e1793c9e-67f9-0310-80fc-b846ff1f7b36

tests/test_qpalma_prediction.py [new file with mode: 0644]
tests/test_sequence_utils.py

diff --git a/tests/test_qpalma_prediction.py b/tests/test_qpalma_prediction.py
new file mode 100644 (file)
index 0000000..3b9c759
--- /dev/null
@@ -0,0 +1,85 @@
+#!/usr/bin/env python 
+# -*- coding: utf-8 -*- 
+
+import pdb
+import unittest
+import numpy
+import os.path
+import cPickle
+
+from qpalma_main import QPalma
+from Utils import pprint_alignment
+from qpalma.sequence_utils import alphabet
+
+jp = os.path.join
+
+
+class TestQPalmaPrediction(unittest.TestCase):
+   """
+   This class...
+   """
+
+   def setUp(self):
+      self.prediction_set = {}
+
+      read = 'catctcacagtcttcttcttcttctcgattgcagtagc'      
+      currentQualities = [[40]*len(read)]
+
+      id       = 1
+      chromo   = 3
+      strand   = '-'
+
+      tsize = 23470805
+
+      genomicSeq_start  = tsize - 2048
+      genomicSeq_stop   = tsize - 1990
+
+      currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
+
+      example = (currentSeqInfo,read,currentQualities)
+
+      self.prediction_set[id] = [example]
+
+
+   
+   def testAlignments(self):
+      run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
+
+      run   = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
+      run['name'] = 'test_run'
+      run['result_dir']    = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
+
+      param_fn = jp(run_dir,'param_526.pickle')
+      param = cPickle.load(open(param_fn))
+
+      print self.prediction_set
+      for example_key in self.prediction_set.keys():
+         print 'Current example %d' % example_key
+
+         for example in self.prediction_set[example_key]:
+            print example
+            print 'size'
+            print len(example)
+
+      qp = QPalma(True)
+      #qp.init_prediction(run,set_name)
+      allPredictions = qp.predict(run,self.prediction_set,param)
+      for elem in allPredictions:
+         dna_array   = elem['dna_array']
+         read_array  = elem['read_array']
+
+         dna   = map(lambda x: alphabet[x],dna_array)
+         read  = map(lambda x: alphabet[x],read_array)
+
+         spliceAlign = elem['spliceAlign']
+         estAlign    = elem['estAlign']
+
+         line1,line2,line3 = pprint_alignment(spliceAlign, estAlign, dna, read)
+         print line1
+         print line2
+         print line3
+
+
+if __name__ == '__main__':
+   suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
+   unittest.TextTestRunner(verbosity=2).run(suite)
index bd8d5dd..b83c40a 100644 (file)
@@ -14,8 +14,6 @@ class TestSequenceUtils(unittest.TestCase):
    def setUp(self):
       self.strands = ['+','']
 
-
-   def testThalianaData(self):
       g_dir    = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
       acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
       don_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
@@ -26,28 +24,115 @@ class TestSequenceUtils(unittest.TestCase):
       num_chromo = 6
 
       accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
-      seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
-   
-      #for chromo in range(1,num_chromo):
-      #   dna,acc,don = seqInfo.get_seq_and_scores(chromo,'+',0,1369)
-      #   dna_,acc_,don_ = seqInfo.get_seq_and_scores(chromo,'-',0,1369)
-
-      #   self.assertEqual(len(dna),len(dna_))
-      #   self.assertEqual(dna,dna_)
-
-      dna,acc,don = seqInfo.get_seq_and_scores(1,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(2,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(3,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(4,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(5,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(1,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(2,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(3,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(4,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(5,'-',0,1369)
-
-
-   def testLyrataData(self):
+      self.seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
+
+      #self.lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,2))
+
+
+   def testThalianaDataExamples(self):
+      seq = 'TGAAAACAGGAACGGATTGGAGAAAGGCGTCTCGTCAT'.lower()
+      chromo = 3
+      strand = '+'
+      pos = 19246391
+
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+      
+      seq = 'AGGCAATGAAACTGATGCATTGGACTTGACGGGTGTTG'.lower()
+      chromo = 5
+      strand = '+'
+      pos = 15394760
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+
+      seq = 'TCTTGGTGGAGGAGCTAACACCGTAGCTGACGGTTACA'.lower()
+      chromo = 4
+      strand = '+'
+      pos = 16709206
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+
+      seq = 'TTGGAAGACAGAGTCAACCATACCCTTGCCTCTGGTGA'.lower()
+      chromo = 2
+      strand = '+'
+      pos = 16579649
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+
+      seq = 'CTGGCCAAAAGCTCAGGGAAGACGCAGCCTAGGGCTCC'.lower()
+      seq = reverse_complement(seq)
+      chromo = 1
+      strand = '-'
+      pos = 10475515
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+
+      seq = 'TTTTTCCCTTCTAGAAGACCGTAAAGGTAAACTTCTAA'.lower()
+      seq = reverse_complement(seq)
+      chromo = 3
+      strand = '-'
+      pos = 17143420
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+
+      seq = 'CACGGTGCAGATGAAGAACTGAGATCCGTTCGTGTTTG'.lower()
+      seq = reverse_complement(seq)
+      chromo = 4
+      strand = '-'
+      pos = 18083761
+      dna = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38,True)
+      self.assertEqual(seq,dna)
+
+      window = 113
+      seq = 'CACGGTGCAGATGAAGAACTGAGATCCGTTCGTGTTTG'.lower()
+      seq = reverse_complement(seq)
+      chromo = 4
+      strand = '-'
+      pos = 18083761-window
+      dna,acc,don = self.seqInfo.get_seq_and_scores(chromo,strand,pos,pos+38+2*window,False)
+      self.assertEqual(seq,dna[window:-window])
+
+      #print dna
+      #print "".join(map(lambda x: ['_','x'][x!=-numpy.inf],acc))
+      #print "".join(map(lambda x: ['_','x'][x!=-numpy.inf],don))
+
+
+   def _testThalianaDataGeneric(self):
+
+      dna,acc,don = self.seqInfo.get_seq_and_scores(1,'+',1000,1369)
+      dna_,acc_,don_ = self.lt1.get_seq_and_scores(1,'+',1000,1369,'')
+
+      self.assertEqual(dna,dna_)
+      self.assertEqual(acc,acc_)
+      self.assertEqual(don,don_)
+
+      dna,acc,don = self.seqInfo.get_seq_and_scores(1,'-',1000,1369)
+      dna_,acc_,don_ = self.lt1.get_seq_and_scores(1,'-',1000,1369,'')
+
+      self.assertEqual(dna,dna_)
+      self.assertEqual(acc,acc_)
+      self.assertEqual(don,don_)
+
+      #dna,acc,don = seqInfo.get_seq_and_scores(2,'+',1000,1369)
+      #dna,acc,don = seqInfo.get_seq_and_scores(3,'+',1000,1369)
+      #dna,acc,don = seqInfo.get_seq_and_scores(4,'+',1000,1369)
+      #dna,acc,don = seqInfo.get_seq_and_scores(5,'+',1000,1369)
+
+      #dna,acc,don = seqInfo.get_seq_and_scores(1,'-',1000,1369)
+      #dna_,acc_,don_ = lt1.get_seq_and_scores(1,'-',1000,1369,'')
+
+      #self.assertEqual(dna,dna_)
+      #self.assertEqual(acc,acc_)
+      #self.assertEqual(don,don_)
+
+
+      #dna,acc,don = seqInfo.get_seq_and_scores(2,'-',1000,1369)
+      #dna,acc,don = seqInfo.get_seq_and_scores(3,'-',1000,1369)
+      #dna,acc,don = seqInfo.get_seq_and_scores(4,'-',1000,1369)
+      #dna,acc,don = seqInfo.get_seq_and_scores(5,'-',1000,1369)
+
+
+   def _testLyrataData(self):
       g_dir    = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/contigs'
       acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/acc'
       don_dir  = '/fml/ag-raetsch/home/fabio/tmp/Lyrata/splice_scores/don'
@@ -60,18 +145,18 @@ class TestSequenceUtils(unittest.TestCase):
       accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
       seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
 
-      dna,acc,don = seqInfo.get_seq_and_scores(1,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(2,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(3,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(4,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(5,'+',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(1,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(2,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(3,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(4,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(5,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(45,'-',0,1369)
-      dna,acc,don = seqInfo.get_seq_and_scores(45,'+',0,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(1,'+',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(2,'+',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(3,'+',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(4,'+',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(5,'+',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(1,'-',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(2,'-',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(3,'-',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(4,'-',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(5,'-',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(45,'-',1,1369)
+      dna,acc,don = seqInfo.get_seq_and_scores(45,'+',1,1369)
 
       print 'Finished'
       #num_tests = 10
@@ -97,7 +182,7 @@ class TestLookupTable(unittest.TestCase):
       pass
 
 
-   def testTableThalianaData(self):
+   def _testTableThalianaData(self):
       g_dir    = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
       acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
       don_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
@@ -116,7 +201,7 @@ class TestLookupTable(unittest.TestCase):
       g_fmt = 'contig%d.dna.flat'
       s_fmt = 'contig_%d%s'
 
-      lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(1,100))
+      lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,range(0,1099))
 
 
    def tearDown(self):
@@ -150,112 +235,7 @@ def check_wrapper():
       assert os.path.exists(don_fn)
 
 
-def check_positions(dna,acc,don,offset=0):
-   first_gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')][:offset]
-   first_ag_tuple_pos = [p for p,e in enumerate(dna) if p>1 and p<len(dna) and dna[p-1]=='a' and dna[p]=='g'][:offset]
-
-   first_acc_scores = [p for p,e in enumerate(acc) if e != -numpy.inf][:offset]
-   first_don_scores = [p for p,e in enumerate(don) if e != -numpy.inf][:offset]
-
-   last_gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')][-offset:]
-   last_ag_tuple_pos = [p for p,e in enumerate(dna) if p>1 and p<len(dna) and dna[p-1]=='a' and dna[p]=='g'][-offset:]
-
-   last_acc_scores = [p for p,e in enumerate(acc) if e != -numpy.inf][-offset:]
-   last_don_scores = [p for p,e in enumerate(don) if e != -numpy.inf][-offset:]
-
-   assert first_gt_tuple_pos == first_don_scores
-   assert first_ag_tuple_pos == first_acc_scores
-
-   assert last_gt_tuple_pos == last_don_scores
-   assert last_ag_tuple_pos == last_acc_scores
-
-
-def run():
-   flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   chromo   = 1
-   strand   = '+'
-   begin    = 0
-   end      = 100000000
-   dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
-
-   check_positions(dna,acc,don,100)
-
-
-def run2():
-   flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   chromo   = 8
-   strand   = '-'
-   begin    = 0
-   end      = 100000000
-   dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
-
-   check_positions(dna,acc,don,100)
-
-   pdb.set_trace()
-
-
-def run3():
-   flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   
-   begin = 0
-   end   = 100000000
-   full_pos_dna = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
-
-   begin = 0
-   end   = 500
-   dna = qpalma.sequence_utils.get_seq_and_scores(8,'+',begin,end,flat_files,True)
-
-   pos_dna = full_pos_dna[-500:]
-   r_dna = qpalma.sequence_utils.reverse_complement(dna)
-   pos_dna == r_dna
-   pdb.set_trace()
-   
-def run4():
-   flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   seqInfo = SeqSpliceInfo(flat_files,range(1,6))
-   chromo   = 3
-   strand   = '+'
-   begin    = 200
-   end      = 1200
-   dna,acc,don = seqInfo.get_seq_and_scores(chromo,strand,begin,end)
-
-   check_positions(dna,acc,don)
-
-   pdb.set_trace()
-
-
-def check_negative_strand(b,e):
-   flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   chromo   = 8
-   strand   = '-'
-   begin    = b
-   end      = e
-   dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
-
-   pdb.set_trace()
-
-   check_positions(dna,acc,don)
-
-   print 'fine'
-
-def check_positive_strand(b,e):
-   flat_files =  '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
-   chromo   = 1
-   strand   = '+'
-   begin    = b
-   end      = e
-   dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
-
-   check_positions(dna,acc,don)
-   print 'fine'
-
-
 if __name__ == '__main__':
-   #run()
-   #run2()
-   #run3(
-   #run4()
-   #perform_checks()
    suite = unittest.TestLoader().loadTestsFromTestCase(TestSequenceUtils)
+   #suite = unittest.TestLoader().loadTestsFromTestCase(TestLookupTable)
    unittest.TextTestRunner(verbosity=2).run(suite)
-