from Genefinding import *
from genome_utils import load_genomic
-extended_alphabet = ['-','a','c','g','t','n','[',']']
-alphabet = ['-','a','c','g','t','n']
+extended_alphabet = ['-','a','c','g','t','n','[',']']
+alphabet = ['-','a','c','g','t','n']
class DataAccessionException:
return "".join(new_seq).lower()
+def reconstruct_dna_seq(seq):
+ """
+
+ """
+
+ new_seq = ''
+ e = 0
+
+ while True:
+ if e >= len(seq):
+ break
+
+ if seq[e] == '[':
+ new_seq += seq[e+1]
+ e += 4
+ else:
+ new_seq += seq[e]
+ e += 1
+
+ return "".join(new_seq).lower()
+
+
+
def create_bracket_seq(dna_seq,read_seq):
"""
This function takes a dna sequence and a read sequence and returns the
def my_load_genomic(fname, strand, start, stop, one_based=True):
- if (type(start)==numpy.ndarray) or (type(stop)==numpy.ndarray):
- assert(len(start) == len(stop))
- assert((start[1:]-stop[:-1]>0).all())
- if strand == '+':
- idx = xrange(len(start))
- else:
- idx = xrange(len(start)-1,-1,-1)
-
- seq = ''.join([load_genomic(chromosome, strand, start[ix], stop[ix], genome)\
+
+ if (type(start)==numpy.ndarray) or (type(stop)==numpy.ndarray):
+ assert(len(start) == len(stop))
+ assert((start[1:]-stop[:-1]>0).all())
+ if strand == '+':
+ idx = xrange(len(start))
+ else:
+ idx = xrange(len(start)-1,-1,-1)
+
+ seq = ''.join([load_genomic(chromosome, strand, start[ix], stop[ix], genome)\
for ix in idx])
- return seq
-
- f=file(fname)
- if one_based:
- f.seek(start-1)
- str=f.read(stop-start+1)
- else:
- f.seek(start)
- str=f.read(stop-start)
-
- if strand=='-':
- return reverse_complement(str)
- elif strand=='+':
- return str
- else:
- print 'strand must be + or -'
- raise KeyError
+ return seq
+
+ try:
+ f=file(fname)
+ if one_based:
+ f.seek(start-1)
+ str=f.read(stop-start+1)
+ else:
+ f.seek(start)
+ str=f.read(stop-start)
+
+ if strand=='-':
+ return reverse_complement(str)
+ elif strand=='+':
+ return str
+ else:
+ print 'strand must be + or -'
+ raise KeyError
+
+ except:
+ print fname,strand,start,stop
class DataAccessWrapper:
extended_alphabet = ['-','a','c','g','t','n','[',']']
alphabet = ['-','a','c','g','t','n']
+alignment_alphabet = ['-','a','c','g','t','n','z']
+
+
+def print_prediction(example):
+ dna_array = example['dna_array']
+ read_array = example['read_array']
+
+ dna = map(lambda x: alignment_alphabet[x],dna_array)
+ read = map(lambda x: alignment_alphabet[x],read_array)
+
+ spliceAlign = example['spliceAlign']
+ estAlign = example['estAlign']
+
+ line1,line2,line3 = pprint_alignment(spliceAlign, estAlign, dna, read)
+
+ result = '%s\n%s\n%s' %(line1,line2,line3)
+ return result
+
def calc_info(acc,don,exons,qualities):
min_score = 100
max_score = -100