+ minor fixes
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 4 Aug 2008 12:23:17 +0000 (12:23 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 4 Aug 2008 12:23:17 +0000 (12:23 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@10313 e1793c9e-67f9-0310-80fc-b846ff1f7b36

qpalma/sequence_utils.py

index 930811f..7cf5e4c 100644 (file)
@@ -246,6 +246,7 @@ class SeqSpliceInfo():
       #chromo_string  = 'chr%d' % chromo
       #genomicSeq = load_genomic(chromo_string,strand,genomicSeq_start,genomicSeq_stop,self.flat_files,one_based=False)
       filename = self.accessWrapper.get_genomic_fragment_fn(chromo,strand)
+      print filename,genomicSeq_start,genomicSeq_stop
       genomicSeq = my_load_genomic(filename,'+',genomicSeq_start,genomicSeq_stop,one_based=False)
       genomicSeq = genomicSeq.strip().lower()
 
@@ -258,6 +259,7 @@ class SeqSpliceInfo():
 
       return genomicSeq
 
+
    def get_seq_and_scores(self,chromo,strand,genomicSeq_start,genomicSeq_stop,only_seq=False):
       """
       This function expects an interval, chromosome and strand information and
@@ -268,15 +270,30 @@ class SeqSpliceInfo():
       # do not have any score predictions
       NO_SCORE_WINDOW_SIZE = 205
 
+      total_size = self.chromo_sizes[chromo]
+
+      #print genomicSeq_start,genomicSeq_stop
+
+      assert genomicSeq_start < genomicSeq_stop
+
+      if strand == '+':
+         s_start  = genomicSeq_start - 1
+         s_stop   = genomicSeq_stop - 1 
+
+         genomicSeq_start  -= 1
+         genomicSeq_stop   -= 1 
+
+      if strand == '-':
+         s_start  = total_size - genomicSeq_stop + 1
+         s_stop   = total_size - genomicSeq_start + 1 
+
       assert genomicSeq_start < genomicSeq_stop
 
-      genomicSeq = self.fetch_sequence(chromo,strand,genomicSeq_start,genomicSeq_stop)
+      genomicSeq = self.fetch_sequence(chromo,strand,s_start,s_stop)
 
       if only_seq:
          return genomicSeq
 
-      total_size = self.chromo_sizes[chromo]
-
       # shorten the intervalEnd to the maximal size of the genomic sequence if it would exceed this size
       lookup_offset_begin  = 10
       lookup_offset_end    = 10
@@ -290,10 +307,10 @@ class SeqSpliceInfo():
 
       currentAcc, currentDon = self.getSpliceScores(chromo,strand,intervalBegin,intervalEnd,genomicSeq)
 
-      original_acc = list(currentAcc)
-
       if strand == '-':
-         currentDon = [-inf]+currentDon[:-1]
+         currentAcc = currentAcc[1:]
+         #currentDon = currentDon[1:]
+         #currentDon = [-inf]+currentDon[:-1]
       else:
          currentAcc = currentAcc[2:]
          currentDon = currentDon[1:]
@@ -333,12 +350,18 @@ class SeqSpliceInfo():
       acc_pos = [p for p,e in enumerate(currentAcc) if e != -inf and p > 1]
       don_pos = [p for p,e in enumerate(currentDon) if e != -inf and p > 0]
 
+      print 'ACC: Chromo/strand: %d/%s' % (chromo,strand)
+      print ag_tuple_pos[:10],ag_tuple_pos[-10:]
+      print acc_pos[:10],acc_pos[-10:]
       if not ag_tuple_pos == acc_pos:
          print 'ACC: Chromo/strand: %d/%s' % (chromo,strand)
          print ag_tuple_pos[:10],ag_tuple_pos[-10:]
          print acc_pos[:10],acc_pos[-10:]
          pdb.set_trace()
 
+      print 'DON: Chromo/strand: %d/%s' % (chromo,strand)
+      print gt_tuple_pos[:10],gt_tuple_pos[-10:]
+      print don_pos[:10],don_pos[-10:]
       if not gt_tuple_pos == don_pos:
          print 'DON: Chromo/strand: %d/%s' % (chromo,strand)
          print gt_tuple_pos[:10],gt_tuple_pos[-10:]