author fabio Wed, 6 Aug 2008 16:12:19 +0000 (16:12 +0000) committer fabio Wed, 6 Aug 2008 16:12:19 +0000 (16:12 +0000)
+ addded alignment viz to Utils

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@10345 e1793c9e-67f9-0310-80fc-b846ff1f7b36

 qpalma/sequence_utils.py patch | blob | history scripts/Utils.py patch | blob | history

index 7f5e580..4d0cd47 100644 (file)
@@ -20,8 +20,8 @@ from numpy.matlib import inf
from Genefinding import *

-extended_alphabet = ['-','a','c','g','t','n','[',']']
-alphabet          = ['-','a','c','g','t','n']
+extended_alphabet    = ['-','a','c','g','t','n','[',']']
+alphabet             = ['-','a','c','g','t','n']

class DataAccessionException:
@@ -112,6 +112,29 @@ def unbracket_seq(seq):
return "".join(new_seq).lower()

+def reconstruct_dna_seq(seq):
+   """
+
+   """
+
+   new_seq = ''
+   e = 0
+
+   while True:
+      if e >= len(seq):
+         break
+
+      if seq[e] == '[':
+         new_seq += seq[e+1]
+         e += 4
+      else:
+         new_seq += seq[e]
+         e += 1
+
+   return "".join(new_seq).lower()
+
+
+
"""
This function takes a dna sequence and a read sequence and returns the
@@ -127,33 +150,38 @@ def create_bracket_seq(dna_seq,read_seq):

def my_load_genomic(fname, strand, start, stop, one_based=True):
-    if (type(start)==numpy.ndarray) or (type(stop)==numpy.ndarray):
-        assert(len(start) == len(stop))
-        assert((start[1:]-stop[:-1]>0).all())
-        if strand == '+':
-            idx = xrange(len(start))
-        else:
-            idx = xrange(len(start)-1,-1,-1)
-
-        seq = ''.join([load_genomic(chromosome, strand, start[ix], stop[ix], genome)\
+
+   if (type(start)==numpy.ndarray) or (type(stop)==numpy.ndarray):
+      assert(len(start) == len(stop))
+      assert((start[1:]-stop[:-1]>0).all())
+      if strand == '+':
+         idx = xrange(len(start))
+      else:
+         idx = xrange(len(start)-1,-1,-1)
+
+      seq = ''.join([load_genomic(chromosome, strand, start[ix], stop[ix], genome)\
for ix in idx])
-        return seq
-
-    f=file(fname)
-    if one_based:
-        f.seek(start-1)
-    else:
-        f.seek(start)
-
-    if strand=='-':
-        return reverse_complement(str)
-    elif strand=='+':
-        return str
-    else:
-        print 'strand must be + or -'
-        raise KeyError
+      return seq
+
+   try:
+      f=file(fname)
+      if one_based:
+         f.seek(start-1)
+      else:
+         f.seek(start)
+
+      if strand=='-':
+         return reverse_complement(str)
+      elif strand=='+':
+         return str
+      else:
+         print 'strand must be + or -'
+         raise KeyError
+
+   except:
+      print fname,strand,start,stop

class DataAccessWrapper:
index 7a54ed5..db685d1 100644 (file)
@@ -14,6 +14,24 @@ from palma.output_formating import print_results
extended_alphabet = ['-','a','c','g','t','n','[',']']
alphabet          = ['-','a','c','g','t','n']

+alignment_alphabet   = ['-','a','c','g','t','n','z']
+
+
+def print_prediction(example):
+   dna_array   = example['dna_array']
+
+   dna   = map(lambda x: alignment_alphabet[x],dna_array)