from numpy.matlib import mat,zeros,ones,inf
from numpy import inf,mean
-#from qpalma.parsers import *
-
from ParaParser import ParaParser,IN_VECTOR
from qpalma.Lookup import LookupTable
g_fmt = 'chr%d.dna.flat'
s_fmt = 'contig_%d%s'
- #self.chromo_range = range(1,6)
- self.chromo_range = range(1,2)
+ self.chromo_range = range(1,6)
+ #self.chromo_range = range(1,2)
num_chromo = 2
accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
self.qualityPlifs = qualityPlifs
self.read_size = 38
+ self.prb_offset = 64
+ #self.prb_offset = 50
# parameters of the heuristics to decide whether the read is spliced
#self.splice_thresh = 0.005
genomicSeq_stop = pos+effective_len
start = cpu()
- currentDNASeq_, currentAcc_, currentDon_ = self.seqInfo.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,False)
- currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,False)
+ #currentDNASeq_, currentAcc_, currentDon_ = self.seqInfo.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,False)
+ currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop)
- assert currentDNASeq_ == currentDNASeq
- assert currentAcc_ == currentAcc_
- assert currentDon_ == currentDon_
+ # just performed some tests to check LookupTable results
+ #assert currentDNASeq_ == currentDNASeq
+ #assert currentAcc_ == currentAcc_
+ #assert currentDon_ == currentDon_
stop = cpu()
self.get_time += stop-start
exons[1,0] = effective_len
est = unb_seq
original_est = seq
- quality = map(lambda x:ord(x)-64,prb)
+ quality = map(lambda x:ord(x)-self.prb_offset,prb)
currentVMatchAlignment = dna, exons, est, original_est, quality,\
currentAcc, currentDon
#pdb.set_trace()
- alternativeAlignmentScores = self.calcAlternativeAlignments(location)
- #try:
- # alternativeAlignmentScores = self.calcAlternativeAlignments(location)
- #except:
- # alternativeAlignmentScores = []
+ #alternativeAlignmentScores = self.calcAlternativeAlignments(location)
+ try:
+ alternativeAlignmentScores = self.calcAlternativeAlignments(location)
+ except:
+ alternativeAlignmentScores = []
if alternativeAlignmentScores == []:
# no alignment necessary
maxAlternativeAlignmentScore = max(alternativeAlignmentScores)
# compute alignment for vmatch unspliced read
vMatchScore = self.calcAlignmentScore(currentVMatchAlignment)
+ #print 'vMatchScore/alternativeScore: %f %f %s' % (vMatchScore,maxAlternativeAlignmentScore,strand)
+
start = cpu()
- #print 'vMatchScore/alternativeScore: %f %f ' % (vMatchScore,maxAlternativeAlignmentScore)
#print 'all candidates %s' % str(alternativeAlignmentScores)
new_id = id - 1000000300000
#distal_acc.sort(lambda x,y: signum(x[1]-y[1]))
#distal_acc=distal_acc[-2:]
-
proximal_don = []
for idx in xrange(max_intron_size+read_size/2, max_intron_size+read_size):
if currentDon[idx] >= splice_thresh:
strand = location['strand']
original_est = location['seq']
quality = location['prb']
- quality = map(lambda x:ord(x)-64,quality)
+ quality = map(lambda x:ord(x)-self.prb_offset,quality)
original_est = original_est.lower()
est = unbracket_seq(original_est)
start = cpu()
#currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop, run['dna_flat_files'])
- currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop, run['dna_flat_files'])
+ currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop)
stop = cpu()
self.get_time += stop-start
dna = currentDNASeq
-
+
proximal_acc,proximal_don,distal_acc,distal_don = self.findHighestScoringSpliceSites(currentAcc, currentDon, dna, self.max_intron_size, len(est), self.splice_thresh)
alternativeScores = []
+
+ #pdb.set_trace()
# inlined
h = self.h
if acc_score>=self.splice_stop_thresh:
break
+ #pdb.set_trace()
+
_stop = cpu()
self.alternativeScoresTime += _stop-_start