+ added license text
[qpalma.git] / tests / test_qpalma_prediction.py
index 3b9c759..f1211d3 100644 (file)
@@ -1,15 +1,19 @@
 #!/usr/bin/env python 
 # -*- coding: utf-8 -*- 
 
-import pdb
-import unittest
+import cPickle
+import math
 import numpy
 import os.path
-import cPickle
+import pdb
+import unittest
 
 from qpalma_main import QPalma
-from Utils import pprint_alignment
-from qpalma.sequence_utils import alphabet
+from Utils import print_prediction
+
+from createAlignmentFileFromPrediction import alignment_reconstruct
+
+from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo
 
 jp = os.path.join
 
@@ -19,11 +23,22 @@ class TestQPalmaPrediction(unittest.TestCase):
    This class...
    """
 
+   def _setUp(self):
+      data_fn = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1/dataset/prediction_set.pickle'
+
+      self.prediction_set = cPickle.load(open(data_fn))
+
+      
+
    def setUp(self):
+      print
       self.prediction_set = {}
 
-      read = 'catctcacagtcttcttcttcttctcgattgcagtagc'      
-      currentQualities = [[40]*len(read)]
+      #  xxxxxxxxxxx_______________________________________________________________________xxxxxxxxxxxxxxxxxxxxxxxxxxx
+      # 'tattttggaaggtatttcatctctccgacattgctctcaacactgtcccctccaatgcctagtccttttatttttttcttagttccaattcccttaaatacatctcacagtcttcttcttcttctcgattgcagtagc'
+
+      read = 'tattttggaagttccaattcccttaatacatctcacag'
+      currentQualities = [[30]*len(read)]
 
       id       = 1
       chromo   = 3
@@ -31,15 +46,37 @@ class TestQPalmaPrediction(unittest.TestCase):
 
       tsize = 23470805
 
-      genomicSeq_start  = tsize - 2048
-      genomicSeq_stop   = tsize - 1990
+      genomicSeq_start  = tsize - (2038+10)
+      genomicSeq_stop   = tsize - 1900 + 10
+      print 'Position: ',
+      print genomicSeq_start,genomicSeq_stop 
 
       currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
 
       example = (currentSeqInfo,read,currentQualities)
-
       self.prediction_set[id] = [example]
 
+      #  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx                                                                                        xxxxxxxx
+      # 'gctactgcaatcgagaagaagaagaagactgtgagatgtatttaagggaattggaactaagaaaaaaataaaaggactaggcattggaggggacagtgttgagagcaatgtcggagagatgaaataccttccaaaata'
+
+      read = 'gctactgcaatcgagaagaagaagaagactatgaaata'
+      currentQualities = [[40]*30+[22]*8]
+
+      id       = 2
+      chromo   = 3
+      strand   = '+'
+
+      tsize = 23470805
+
+      genomicSeq_start  = tsize - (2038+10)
+      genomicSeq_stop   = tsize - 1900 + 10
+      print 'Position: ',
+      print genomicSeq_start,genomicSeq_stop 
+
+      currentSeqInfo = id,chromo,strand,genomicSeq_start,genomicSeq_stop
+
+      example = (currentSeqInfo,read,currentQualities)
+      self.prediction_set[id] = [example]
 
    
    def testAlignments(self):
@@ -61,24 +98,41 @@ class TestQPalmaPrediction(unittest.TestCase):
             print 'size'
             print len(example)
 
-      qp = QPalma(True)
-      #qp.init_prediction(run,set_name)
-      allPredictions = qp.predict(run,self.prediction_set,param)
-      for elem in allPredictions:
-         dna_array   = elem['dna_array']
-         read_array  = elem['read_array']
+      # fetch the data needed
+      g_dir    = run['dna_flat_files'] #'/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+      acc_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+      don_dir  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
 
-         dna   = map(lambda x: alphabet[x],dna_array)
-         read  = map(lambda x: alphabet[x],read_array)
+      g_fmt = 'chr%d.dna.flat'
+      s_fmt = 'contig_%d%s'
 
-         spliceAlign = elem['spliceAlign']
-         estAlign    = elem['estAlign']
+      num_chromo = 6
 
-         line1,line2,line3 = pprint_alignment(spliceAlign, estAlign, dna, read)
-         print line1
-         print line2
-         print line3
+      accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+      seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
 
+      qp = QPalma(run,seqInfo,True)
+      #qp.init_prediction(run,set_name)
+      allPredictions = qp.predict(self.prediction_set,param)
+
+      for current_prediction in allPredictions:
+         align_str = print_prediction(current_prediction)
+         print align_str
+
+         id = current_prediction['id']
+         seq         = current_prediction['read']
+         dna         = current_prediction['dna']
+         chromo      = current_prediction['chr']
+         strand      = current_prediction['strand']
+         start_pos   = current_prediction['start_pos']
+         predExons   = current_prediction['predExons']
+
+         numExons = int(math.ceil(len(predExons) / 2))
+         
+         print alignment_reconstruct(current_prediction,numExons)
+         #print id,start_pos,predExons
+
+      print 'Problem counter is %d' % qp.problem_ctr 
 
 if __name__ == '__main__':
    suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)