+ minor mods in sequence_utils
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 6 Aug 2008 16:12:19 +0000 (16:12 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 6 Aug 2008 16:12:19 +0000 (16:12 +0000)
+ addded alignment viz to Utils

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@10345 e1793c9e-67f9-0310-80fc-b846ff1f7b36

qpalma/sequence_utils.py
scripts/Utils.py

index 7f5e580..4d0cd47 100644 (file)
@@ -20,8 +20,8 @@ from numpy.matlib import inf
 from Genefinding import *
 from genome_utils import load_genomic
 
-extended_alphabet = ['-','a','c','g','t','n','[',']']
-alphabet          = ['-','a','c','g','t','n']
+extended_alphabet    = ['-','a','c','g','t','n','[',']']
+alphabet             = ['-','a','c','g','t','n']
 
 
 class DataAccessionException:
@@ -112,6 +112,29 @@ def unbracket_seq(seq):
    return "".join(new_seq).lower()
 
 
+def reconstruct_dna_seq(seq):
+   """
+
+   """
+
+   new_seq = ''
+   e = 0
+
+   while True:
+      if e >= len(seq):
+         break
+
+      if seq[e] == '[':
+         new_seq += seq[e+1]
+         e += 4
+      else:
+         new_seq += seq[e]
+         e += 1
+
+   return "".join(new_seq).lower()
+
+
+
 def create_bracket_seq(dna_seq,read_seq):
    """
    This function takes a dna sequence and a read sequence and returns the
@@ -127,33 +150,38 @@ def create_bracket_seq(dna_seq,read_seq):
 
 
 def my_load_genomic(fname, strand, start, stop, one_based=True):
-    if (type(start)==numpy.ndarray) or (type(stop)==numpy.ndarray):
-        assert(len(start) == len(stop))
-        assert((start[1:]-stop[:-1]>0).all())
-        if strand == '+':
-            idx = xrange(len(start))
-        else:
-            idx = xrange(len(start)-1,-1,-1)
-
-        seq = ''.join([load_genomic(chromosome, strand, start[ix], stop[ix], genome)\
+
+   if (type(start)==numpy.ndarray) or (type(stop)==numpy.ndarray):
+      assert(len(start) == len(stop))
+      assert((start[1:]-stop[:-1]>0).all())
+      if strand == '+':
+         idx = xrange(len(start))
+      else:
+         idx = xrange(len(start)-1,-1,-1)
+
+      seq = ''.join([load_genomic(chromosome, strand, start[ix], stop[ix], genome)\
                        for ix in idx])
-        return seq
-
-    f=file(fname)
-    if one_based:
-        f.seek(start-1)
-        str=f.read(stop-start+1)
-    else:
-        f.seek(start)
-        str=f.read(stop-start)
-
-    if strand=='-':
-        return reverse_complement(str)
-    elif strand=='+':
-        return str
-    else:
-        print 'strand must be + or -'
-        raise KeyError
+      return seq
+
+   try:
+      f=file(fname)
+      if one_based:
+         f.seek(start-1)
+         str=f.read(stop-start+1)
+      else:
+         f.seek(start)
+         str=f.read(stop-start)
+
+      if strand=='-':
+         return reverse_complement(str)
+      elif strand=='+':
+         return str
+      else:
+         print 'strand must be + or -'
+         raise KeyError
+
+   except:
+      print fname,strand,start,stop
 
 
 class DataAccessWrapper:
index 7a54ed5..db685d1 100644 (file)
@@ -14,6 +14,24 @@ from palma.output_formating import print_results
 extended_alphabet = ['-','a','c','g','t','n','[',']']
 alphabet          = ['-','a','c','g','t','n']
 
+alignment_alphabet   = ['-','a','c','g','t','n','z']
+
+
+def print_prediction(example):
+   dna_array   = example['dna_array']
+   read_array  = example['read_array']
+
+   dna   = map(lambda x: alignment_alphabet[x],dna_array)
+   read  = map(lambda x: alignment_alphabet[x],read_array)
+
+   spliceAlign = example['spliceAlign']
+   estAlign    = example['estAlign']
+
+   line1,line2,line3 = pprint_alignment(spliceAlign, estAlign, dna, read)
+   
+   result = '%s\n%s\n%s' %(line1,line2,line3)
+   return result
+
 def calc_info(acc,don,exons,qualities):
    min_score = 100
    max_score = -100