+ fixed some issues with the splice site scores and ugly code fragments
authorFabio <fabio@congo.fml.local>
Mon, 13 Oct 2008 08:46:18 +0000 (10:46 +0200)
committerFabio <fabio@congo.fml.local>
Mon, 13 Oct 2008 08:46:18 +0000 (10:46 +0200)
MANIFEST.in
doc/qpalma-manual.tex
qpalma/Lookup.py
qpalma/SettingsParser.py
qpalma/gridtools.py
qpalma/qpalma_main.py
qpalma/sequence_utils.py
test.conf
tests/test_qpalma.py
tests/test_qpalma_installation.py [new file with mode: 0644]

index d6c12e3..d3efe32 100644 (file)
@@ -1,5 +1,8 @@
 recursive-include ParaParser *.cpp *.h *.i Makefile
 recursive-include DynProg *.cpp *.h *.i Makefile
 recursive-include doc *.tex Makefile
+recursive-include scripts *.py
+recursive-include qpalma *.py
+recursive-include tests *.py
 include test.conf
 
index 78f011e..908dead 100644 (file)
@@ -62,7 +62,7 @@ For training \QP you need one of the following optimization toolkits:
 
 \begin{enumerate}
 \item Install the Pythongrid and the Genefinding tool packages
-\item Update your PYTHONPATH variable to poin to the above packages
+\item Update your PYTHONPATH variable to point to the above packages
 \item Unpack the QPalma tarball via
 \item[$\rightarrow$] tar -xzvf QPalma-1.0.tar.gz
 \item Enter the QPalma-1.0 directory and type:
index da9365c..bc36f96 100644 (file)
@@ -8,8 +8,10 @@
 
 import sys
 import os
+import pdb
+from numpy import inf
 
-from qpalma.sequence_utils import SeqSpliceInfo,get_flat_file_size,DataAccessWrapper
+from qpalma.sequence_utils import SeqSpliceInfo,get_flat_file_size,DataAccessWrapper,reverse_complement
 
 
 class LookupTable:
@@ -23,8 +25,6 @@ class LookupTable:
       accessWrapper = DataAccessWrapper(settings)
       self.seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
-      self.strands = ['+','-']
-   
       # take care that it also works say if we get a chromo_list [1,3,5]
       # those are mapped then to 0,1,2
       self.chromo_list = chromo_list
@@ -38,71 +38,81 @@ class LookupTable:
       self.acceptorScores     = {}
       self.donorScores        = {}
 
-      for strandId in self.strands:
-         self.genomicSequences[strandId] = [0]*(len(self.chromo_list))
-         self.acceptorScores[strandId]   = [0]*(len(self.chromo_list))
-         self.donorScores[strandId]      = [0]*(len(self.chromo_list))
+      self.genomicSequences = [0]*(len(self.chromo_list))
+      self.acceptorScoresPos   = [0]*(len(self.chromo_list))
+      self.donorScoresPos      = [0]*(len(self.chromo_list))
+      self.acceptorScoresNeg   = [0]*(len(self.chromo_list))
+      self.donorScoresNeg      = [0]*(len(self.chromo_list))
 
       for chrId in self.chromo_list:
-         for strandId in self.strands:
-            print (chrId,strandId)
-            self.prefetch_seq_and_scores(chrId,strandId)
+         self.prefetch_seq_and_scores(chrId)
 
  
-   def get_seq_and_scores(self,chromo,strand,start,stop):
+   def get_seq_and_scores(self,chromo,strand,_start,_stop):
       assert chromo in self.chromo_list
-      assert strand in self.strands
+
+      if strand == '-':
+         total_size = self.seqInfo.getFragmentSize(chromo)
+         startP = total_size - _stop
+         stopP  = total_size - _start
+         stop = stopP
+         start = startP
 
       assert start <= stop
 
       chromo_idx = self.chromo_map[chromo]
 
-      start -= 1
-      stop  -= 1
-
-      currentSequence         = self.genomicSequences[strand][chromo_idx][start:stop]
-
-      if strand == '-':
-         start += 2
-         stop  += 2
-
-      currentAcceptorScores   = self.acceptorScores[strand][chromo_idx][start:stop]
-      currentDonorScores      = self.donorScores[strand][chromo_idx][start:stop]
+      currentSequence         = self.genomicSequences[chromo_idx][start:stop]
+      if strand == '+':
+         currentAcceptorScores   = self.acceptorScoresPos[chromo_idx][start:stop]
+         currentDonorScores      = self.donorScoresPos[chromo_idx][start:stop]
+      elif strand == '-':
+         currentAcceptorScores   = self.acceptorScoresNeg[chromo_idx][_start:_stop]
+         currentDonorScores      = self.donorScoresNeg[chromo_idx][_start:_stop]
 
       if strand == '-':
-         currentSequence = currentSequence[::-1]
-         currentAcceptorScores.reverse()
-         currentDonorScores.reverse()
+         currentSequence = reverse_complement(currentSequence)
 
       return currentSequence, currentAcceptorScores, currentDonorScores
 
 
-   def prefetch_seq_and_scores(self,chromo,strand):
+   def prefetch_seq_and_scores(self,chromo):
       """
       This function expects an interval, chromosome and strand information and
       returns then the genomic sequence of this interval and the associated scores.
       """
       assert chromo in self.chromo_list
-      assert strand in self.strands
 
-      chromo_idx = self.chromo_map[chromo]
+      genomicSeq_start  = 0
+      genomicSeq_stop   = self.seqInfo.getFragmentSize(chromo)
 
-      genomicSeq_start  = 1
-      genomicSeq_stop   = self.seqInfo.chromo_sizes[chromo]-1
+      print 'lt total_size %d' % self.seqInfo.getFragmentSize(chromo)
 
-      print 'lt total_size %d' % self.seqInfo.chromo_sizes[chromo]
+      strand = '+'
+      genomicSeq,currentAcc,currentDon = self.seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop,False,False)
+      print len(currentAcc),len(currentDon)
+      genomicSeq = genomicSeq.lower()
 
-      if strand == '-':
-         genomicSeq_stop   = self.seqInfo.chromo_sizes[chromo]+1
+      chromo_idx = self.chromo_map[chromo]
+
+      self.genomicSequences[chromo_idx] = genomicSeq
+      self.acceptorScoresPos[chromo_idx]   = currentAcc
+      self.donorScoresPos[chromo_idx]      = currentDon
 
+
+      strand = '-'
       genomicSeq,currentAcc,currentDon = self.seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop,False,False)
+      print len(currentAcc),len(currentDon)
       genomicSeq = genomicSeq.lower()
 
-      if strand == '-':
-         genomicSeq = genomicSeq[::-1]
-         currentAcc.reverse()
-         currentDon.reverse()
+      newCurrentAcc = [-inf] 
+      newCurrentAcc.extend(currentAcc[:-1])
+      currentAcc = newCurrentAcc
+      newCurrentDon = [-inf] 
+      newCurrentDon.extend(currentDon[:-1])
+      currentDon = newCurrentDon
+      print len(currentAcc),len(currentDon)
+
+      self.acceptorScoresNeg[chromo_idx]   = currentAcc
+      self.donorScoresNeg[chromo_idx]      = currentDon
 
-      self.genomicSequences[strand][chromo_idx] = genomicSeq
-      self.acceptorScores[strand][chromo_idx]   = currentAcc
-      self.donorScores[strand][chromo_idx]      = currentDon
index 83b0858..b7a88b8 100644 (file)
@@ -51,6 +51,9 @@ def makeSettings(settings):
    settings['training_dir'] = jp(result_dir, 'training')
    settings['alignment_dir'] = jp(result_dir, 'alignment')
 
+   # the global logfile will be created in the result dir
+   settings['global_log_fn'] = jp(result_dir, 'qpalma.log')
+
    for dir_name in ['approximation_dir','dataset_dir', 'preproc_dir', 'postproc_dir',\
      'prediction_dir', 'training_dir', 'alignment_dir']:
       try:
index 54aae6d..3df842c 100644 (file)
@@ -275,7 +275,7 @@ class PostprocessingTask(ClusterTask):
          self.result_files.append(result_fn)
 
          current_job = KybJob(gridtools.PostProcessingTaskStarter,[self.settings,chunk_fn,result_fn])
-         current_job.h_vmem = '15.0G'
+         current_job.h_vmem = '2.0G'
          current_job.express = 'True'
 
          print "job #1: ", current_job.nativeSpecification
index f4b8c89..a28e910 100644 (file)
@@ -493,17 +493,17 @@ class QPalma:
       # we do not need the full dataset anymore
       del dataset
    
-      # Load parameter vector to predict with
-      param = cPickle.load(open(settings['prediction_param_fn']))
-
-      self.predict(prediction_set,param,settings)
+      self.predict(prediction_set,settings)
 
 
-   def predict(self,prediction_set,param,settings):
+   def predict(self,prediction_set,settings):
       """
       This method...
       """
 
+      # Load parameter vector to predict with
+      param = cPickle.load(open(settings['prediction_param_fn']))
+
       # Set the parameters such as limits/penalties for the Plifs
       [h,d,a,mmatrix,qualityPlifs] =\
       set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
index e111f3c..626ecc5 100644 (file)
@@ -83,7 +83,8 @@ def get_flat_file_size(filename):
       print 'Error occurred while trying to obtain file size'
       raise DataAccessionException
 
-   return int(out)
+   # subtract one because of wc -c output
+   return int(out)-1
 
 
 def reverse_complement(seq):
@@ -244,13 +245,8 @@ class DataAccessWrapper:
       self.sscore_fmt         = settings['splice_score_file_fmt'] 
 
 
-   def get_genomic_fragment_fn(self,id,strand):
-      if strand == '+':
-         return os.path.join(self.genomic_dir,self.genomic_fmt%id)
-      elif strand == '-':
-         return os.path.join(self.genomic_dir,(self.genomic_fmt%id)+'.neg')
-      else:
-         assert False
+   def get_genomic_fragment_fn(self,id):
+      return os.path.join(self.genomic_dir,self.genomic_fmt%id)
 
 
    def get_splice_site_scores_fn(self,id,strand):
@@ -265,21 +261,32 @@ class SeqSpliceInfo():
    
    """
    
-   def __init__(self,accessWrapper,id_list):
+   def __init__(self,accessWrapper,fragment_list):
       self.accessWrapper = accessWrapper
       
       self.chromo_sizes = {}
+
+      # take care that it also works say if we get a chromo_list [1,3,5]
+      # those are mapped then to 0,1,2
+      self.fragment_list = fragment_list
+      self.chromo_map = {}
+      for idx,elem in enumerate(fragment_list):
+         self.chromo_map[elem] = idx
    
       print "Fetching fragment sizes..."
-      for chromo in id_list:
-         filename = self.accessWrapper.get_genomic_fragment_fn(chromo,'+')
+      for id in fragment_list:
+         filename = self.accessWrapper.get_genomic_fragment_fn(id)
          total_size = get_flat_file_size(filename)
-         self.chromo_sizes[chromo] = total_size
+         self.chromo_sizes[self.chromo_map[id]] = total_size
 
       print "done"
 
 
-   def getSpliceScores(self,chromo,strand,intervalBegin,intervalEnd,genomicSeq=''):
+   def getFragmentSize(self,id):
+      return self.chromo_sizes[self.chromo_map[id]]
+
+
+   def getSpliceScores(self,id,strand,intervalBegin,intervalEnd,genomicSeq=''):
       """
       Now we want to use interval_query to get the predicted splice scores trained
       on the TAIR sequence and annotation.
@@ -295,24 +302,21 @@ class SeqSpliceInfo():
       pos_size = new_intp()
       intp_assign(pos_size,1)
 
-      total_size = self.chromo_sizes[chromo]
+      total_size = self.getFragmentSize(id)
 
-      acc_fn, don_fn = self.accessWrapper.get_splice_site_scores_fn(chromo,strand)
+      acc_fn, don_fn = self.accessWrapper.get_splice_site_scores_fn(id,strand)
 
       # fetch acceptor scores
-      acc = doIntervalQuery(chromo,strand,intervalBegin,intervalEnd,acc_fn,total_size)
+      acc = doIntervalQuery(id,strand,intervalBegin,intervalEnd,acc_fn,total_size)
 
       # fetch donor scores
-      don = doIntervalQuery(chromo,strand,intervalBegin,intervalEnd,don_fn,total_size)
+      don = doIntervalQuery(id,strand,intervalBegin,intervalEnd,don_fn,total_size)
 
       return acc, don
 
 
    def fetch_sequence(self,chromo,strand,genomicSeq_start,genomicSeq_stop):
-      #chromo_string  = 'chr%d' % chromo
-      #genomicSeq = load_genomic(chromo_string,strand,genomicSeq_start,genomicSeq_stop,self.flat_files,one_based=False)
-      filename = self.accessWrapper.get_genomic_fragment_fn(chromo,strand)
-      #print filename,genomicSeq_start,genomicSeq_stop
+      filename = self.accessWrapper.get_genomic_fragment_fn(chromo)
       genomicSeq = my_load_genomic(filename,'+',genomicSeq_start,genomicSeq_stop,one_based=False)
       genomicSeq = genomicSeq.strip().lower()
 
@@ -326,7 +330,7 @@ class SeqSpliceInfo():
       return genomicSeq
 
 
-   def get_seq_and_scores(self,chromo,strand,genomicSeq_start,genomicSeq_stop,only_seq=False,perform_checks=True):
+   def get_seq_and_scores(self,id,strand,genomicSeq_start,genomicSeq_stop,only_seq=False,perform_checks=True):
       """
       This function expects an interval, chromosome and strand information and
       returns then the genomic sequence of this interval and the associated scores.
@@ -336,26 +340,22 @@ class SeqSpliceInfo():
       # do not have any score predictions
       NO_SCORE_WINDOW_SIZE = 205
 
+      chromo = self.chromo_map[id]
+
       total_size = self.chromo_sizes[chromo]
 
-      #print genomicSeq_start,genomicSeq_stop
+      if strand == '-':
+         s_start  = total_size - genomicSeq_stop
+         s_stop   = total_size - genomicSeq_start
+         genomicSeq_start = s_start
+         genomicSeq_stop  = s_stop
 
       assert genomicSeq_start < genomicSeq_stop, pdb.set_trace()
 
-      if strand == '+':
-         s_start  = genomicSeq_start - 1
-         s_stop   = genomicSeq_stop - 1 
-
-         genomicSeq_start  -= 1
-         genomicSeq_stop   -= 1 
+      genomicSeq = self.fetch_sequence(id,strand,genomicSeq_start,genomicSeq_stop)
 
       if strand == '-':
-         s_start  = total_size - genomicSeq_stop + 1
-         s_stop   = total_size - genomicSeq_start + 1 
-
-      assert genomicSeq_start < genomicSeq_stop
-
-      genomicSeq = self.fetch_sequence(chromo,strand,s_start,s_stop)
+         genomicSeq = reverse_complement(genomicSeq)
 
       if only_seq:
          return genomicSeq
@@ -371,7 +371,7 @@ class SeqSpliceInfo():
       if intervalEnd == total_size-1:
          lookup_offset_end = 0
 
-      currentAcc, currentDon = self.getSpliceScores(chromo,strand,intervalBegin,intervalEnd,genomicSeq)
+      currentAcc, currentDon = self.getSpliceScores(id,strand,intervalBegin,intervalEnd,genomicSeq)
 
       if strand == '-':
          currentAcc = currentAcc[1:]
@@ -414,13 +414,13 @@ class SeqSpliceInfo():
       don_pos = [p for p,e in enumerate(currentDon) if e != -inf and p > 0]
 
       if perform_checks and not ag_tuple_pos == acc_pos:
-         print 'ACC: Chromo/strand: %d/%s' % (chromo,strand)
+         print 'ACC: Chromo/strand: %d/%s' % (id,strand)
          print ag_tuple_pos[:10],ag_tuple_pos[-10:]
          print acc_pos[:10],acc_pos[-10:]
          pdb.set_trace()
 
       if perform_checks and not gt_tuple_pos == don_pos:
-         print 'DON: Chromo/strand: %d/%s' % (chromo,strand)
+         print 'DON: Chromo/strand: %d/%s' % (id,strand)
          print gt_tuple_pos[:10],gt_tuple_pos[-10:]
          print don_pos[:10],don_pos[-10:]
          pdb.set_trace()
index 4c8a65b..39e2b0c 100644 (file)
--- a/test.conf
+++ b/test.conf
@@ -39,12 +39,6 @@ unspliced_reads_fn = /fml/ag-raetsch/home/fabio/tmp/sandbox/map.vm
 num_splits = 5
 
 #
-# Specifiy here the filename of the global logfile QPalma uses to report everything
-#
-
-global_log_fn = /fml/ag-raetsch/home/fabio/tmp/sandbox/first_test/qpalma.log
-
-#
 # The parameter you want to do prediction with:
 #
 
index 350e5a7..5f62f40 100644 (file)
@@ -24,14 +24,17 @@ from qpalma.utils import print_prediction
 
 from qpalma.Run import Run
 
+from qpalma.SettingsParser import parseSettings
 from qpalma.OutputFormat import alignment_reconstruct
-
 from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo,reverse_complement
 
+from qpalma.Lookup import LookupTable
+
+
 jp = os.path.join
 
 
-def createScoresForSequence(full_chromo_seq):
+def createScoresForSequence(full_chromo_seq,reverse_strand=False):
    """
    Given a genomic sequence this function calculates random scores for all
    ocurring splice sites.
@@ -40,6 +43,8 @@ def createScoresForSequence(full_chromo_seq):
    acc_pos = []
    don_pos = []
 
+   total_size = len(full_chromo_seq)
+
    # the first/last 205 pos do not have any predictions
    for pos,elem in enumerate(full_chromo_seq[205:-205]):
       if full_chromo_seq[pos-2:pos] == 'ag':
@@ -60,6 +65,17 @@ def createScoresForSequence(full_chromo_seq):
    for idx in range(len(don_pos)):
       don_scores[idx] = random.uniform(0.1,1.0)
 
+   # recalculate indices and reverse them in order to have positions relative
+   # to positive strand
+   if reverse_strand:
+      acc_pos = [total_size-1-e for e in acc_pos]
+      acc_pos.reverse()
+      acc_scores.reverse()
+
+      don_pos = [total_size-1-e for e in don_pos]
+      don_pos.reverse()
+      don_scores.reverse()
+
    acc_pos = array.array('I',acc_pos)
    acc_scores = array.array('f',acc_scores)
 
@@ -85,7 +101,6 @@ def saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,strand):
    don_pos.tofile(open(don_pos_fn, 'wb'))
 
 
-
 def create_mini_chromosome():
    
    chromo_fn   = 'test_data/chromo1.flat'
@@ -94,23 +109,47 @@ def create_mini_chromosome():
    full_chromo_seq = chromo_fh.read()
    full_chromo_seq = full_chromo_seq.strip()
 
+   print full_chromo_seq[:200]
+
    # create data for forward strand
-   acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq)
+   acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq,reverse_strand=False)
+   print acc_pos[:5]
+   print don_pos[:5]
    saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,'+')
 
    # create data for reverse strand
    full_chromo_seq_rev = reverse_complement(full_chromo_seq)
-   acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq_rev)
 
-   # positions are always stored relative to positive strand
-   #acc_scores.reverse()
-   #acc_scores.
+   total_size = len(full_chromo_seq_rev)
+
+   print full_chromo_seq_rev[:200]
+   acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq_rev,reverse_strand=True)
+   acc_pos = [total_size-1-e for e in acc_pos]
+   acc_pos.reverse()
+   print acc_pos[:5]
+   don_pos = [total_size-1-e for e in don_pos]
+   don_pos.reverse()
+   print don_pos[:5]
 
-   don_scores.reverse()
+   #
+   # Remember: The positions are always saved one-based.
+   #
 
-   saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,'-')
+   #saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,'-')
 
+   a = 103
+   b = 255
+   print 'pos: %s'%full_chromo_seq[a:b]
+   print 'neg: %s'%full_chromo_seq_rev[a:b]
 
+   total_size = len(full_chromo_seq)
+   ap = total_size-b
+   bp = total_size-a
+   print 'png: %s'%reverse_complement(full_chromo_seq[ap:bp])
+
+
+def test_rev_comp():
+   get_seq_and_scores(self,chromo,strand,genomicSeq_start,genomicSeq_stop,only_seq=False,perform_checks=True)
 
 class TestQPalmaPrediction(unittest.TestCase):
    """
@@ -214,8 +253,47 @@ class TestQPalmaPrediction(unittest.TestCase):
       example = (currentSeqInfo,read,currentQualities)
       self.prediction_set[id] = [example]
 
-   
+
    def testAlignments(self):
+
+      settings = parseSettings('testcase.conf')
+
+      print self.prediction_set
+      for example_key in self.prediction_set.keys():
+         print 'Current example %d' % example_key
+
+         for example in self.prediction_set[example_key]:
+            print example
+            print 'size'
+            print len(example)
+
+      accessWrapper = DataAccessWrapper(settings)
+      seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
+
+      qp = QPalma(seqInfo,True)
+      allPredictions = qp.predict(self.prediction_set,settings)
+
+      for current_prediction in allPredictions:
+         align_str = print_prediction(current_prediction)
+         print align_str
+
+         id = current_prediction['id']
+         seq         = current_prediction['read']
+         dna         = current_prediction['dna']
+         chromo      = current_prediction['chr']
+         strand      = current_prediction['strand']
+         start_pos   = current_prediction['start_pos']
+         predExons   = current_prediction['predExons']
+
+         numExons = int(math.ceil(len(predExons) / 2))
+         
+         print alignment_reconstruct(current_prediction,numExons)
+         print id,start_pos,predExons
+
+      print 'Problem counter is %d' % qp.problem_ctr 
+
+
+   def _testAlignments(self):
       run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
 
       run   = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
@@ -273,7 +351,73 @@ class TestQPalmaPrediction(unittest.TestCase):
       print 'Problem counter is %d' % qp.problem_ctr 
 
 
+def simple_check():
+   # fetch the data needed
+   settings = {}
+
+   #settings['genome_dir']           = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
+   #settings['acceptor_scores_loc']  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+   #settings['donor_scores_loc']     = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+   #settings['genome_file_fmt']      = 'chr%d.dna.flat'
+   #settings['splice_score_file_fmt']= 'contig_%d%s'
+
+   #allowed_fragments = [1]
+   #settings['allowed_fragments'] = allowed_fragments
+
+   settings['genome_dir']           = 'test_data/'
+   settings['acceptor_scores_loc']  = 'test_data/acc'
+   settings['donor_scores_loc']     = 'test_data/don'
+
+   settings['genome_file_fmt']      = 'chromo%d.flat'
+   settings['splice_score_file_fmt']= 'chromo_%d%s'
+
+   allowed_fragments = [1]
+   settings['allowed_fragments'] = allowed_fragments
+
+   accessWrapper = DataAccessWrapper(settings)
+   seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
+
+   b = 0
+   e = 10000
+   seq = seqInfo.get_seq_and_scores(1,'-',b,e,only_seq=True,perform_checks=False)
+
+   total_size = seqInfo.getFragmentSize(1)
+   bp = total_size - e
+   ep = total_size - b
+   seqp = seqInfo.get_seq_and_scores(1,'+',b,e,only_seq=True,perform_checks=False)
+   seqp = reverse_complement(seqp)
+
+   print seq[0:100] == seqp[0:100]
+   print seq[534:1652] == seqp[534:1652]
+   print seq[-100:] == seqp[-100:]
+
+   lt1 = LookupTable(settings)
+   dna,acc,don= lt1.get_seq_and_scores(1,'-',b,e)
+
+   print seq[:100]  == seqp[:100]  == dna[:100]
+   print seq[-100:] == seqp[-100:] == dna[-100:]
+
+   b = 206
+   e = 300
+   dna,acc,don = seqInfo.get_seq_and_scores(1,'-',b,e,only_seq=False,perform_checks=False)
+   
+   lt1 = LookupTable(settings)
+   _dna,_acc,_don= lt1.get_seq_and_scores(1,'-',b,e)
+
+
+   print dna == _dna
+   print acc == _acc
+   print don == _don
+
+   from numpy import inf
+   print [p for p,e in enumerate(acc) if e != -inf]
+   print [p for p,e in enumerate(_acc) if e != -inf]
+
+
+
 if __name__ == '__main__':
-   create_mini_chromosome()
+   #create_mini_chromosome()
+   simple_check()
    #suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
    #unittest.TextTestRunner(verbosity=2).run(suite)
diff --git a/tests/test_qpalma_installation.py b/tests/test_qpalma_installation.py
new file mode 100644 (file)
index 0000000..6a3af01
--- /dev/null
@@ -0,0 +1,92 @@
+#!/usr/bin/env python 
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
+
+#
+# This file performs a lot of checks in order to verify the installation of
+# QPalma
+#
+
+import os
+import sys
+import unittest
+
+from test_approximation import TestApproximation
+from test_qpalma import TestQPalmaPrediction
+from test_utils import TestSequenceUtils,TestLookupTable
+
+SUCCESS = True
+
+def check_for_module(module_name):
+
+   try:
+      exec("import %s"%module_name)
+   except:
+      return False
+      sys.exc_info()
+
+   return True
+
+
+if __name__ == '__main__':
+   log_fn = 'error.log'
+
+   if os.path.exists(log_fn):
+      os.remove(log_fn)
+
+   out_fh = open(log_fn,'w+')
+
+   print '\nChecking if your installation was successful...'
+
+   print '\nChecking for needed modules...\n'
+
+
+   module_list = ['numpy','DRMAA','Genefinding','pythongrid','QPalmaDP','qpalma']
+
+   for mod in module_list:
+      status = check_for_module(mod)
+      if status:
+         mes = "Successful!"
+      else:
+         mes = "Failed!"
+         SUCCESS = False
+
+      line = 'Status of module %s:\t%s'%(mod,mes)
+      print line
+      out_fh.write(line+'\n')
+
+   print '\nChecking for optional modules...\n'
+
+   module_list = ['pycplex', 'cvxopt', 'pymosek']
+
+   for mod in module_list:
+      line = 'Status of module %s:\t%s'%(mod,str(check_for_module(mod)))
+      print line
+      out_fh.write(line+'\n')
+
+   # after checking the modules we run some simple testcases on QPalma.
+   data_suite = unittest.TestLoader().loadTestsFromTestCase(TestLookupTable)
+   approximation_suite = unittest.TestLoader().loadTestsFromTestCase(TestApproximation)
+   prediction_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
+
+   all_suites = unittest.TestSuite([data_suite, approximation_suite, prediction_suite])
+   unittest.TextTestRunner(verbosity=2).run(all_suites)
+
+   if SUCCESS:
+      print '\n\n--- All checks where successful!! ---\n\n'
+   else:
+      print '\n\n--- Send the file error.log to qpalma@tuebingen.mpg.de ---\n\n'
+      env = os.environ
+      lines = ''
+      for var in ['PATH','LD_LIBRARY_PATH','PYTHONPATH']:
+         lines += '%s=%s\n'%(var,env[var])
+      out_fh.write(lines)
+
+   out_fh.close()