print 'True neg / false neg : %d %d'%(self.true_neg,self.false_neg)
- def findHighestScoringSpliceSites(self,currentAcc,currentDon):
+ def findHighestScoringSpliceSites(self, currentAcc, currentDon, DNA, max_intron_size, read_size, splice_thresh):
- acc = []
- for idx,score in enumerate(currentAcc):
- if score > -inf:
- acc.append((idx,score))
- if idx>self.read_size:
- break
+ def signum(a):
+ if a>0:
+ return 1
+ elif a<0:
+ return -1
+ else:
+ return 0
+
+ proximal_acc = []
+ for idx in xrange(max_intron_size, max_intron_size+read_size):
+ if currentAcc[idx]>= splice_thresh:
+ proximal_acc.append((idx,currentAcc[idx]))
+
+ proximal_acc.sort(lambda x,y: signum(x[1]-y[1]))
+ proximal_acc=proximal_acc[-2:]
+
+ distal_acc = []
+ for idx in xrange(max_intron_size+read_size, len(currentAcc)):
+ if currentAcc[idx]>= splice_thresh:
+ distal_acc.append((idx, currentAcc[idx], DNA[idx:idx+read_size]))
+
+ #distal_acc.sort(lambda x,y: signum(x[1]-y[1]))
+ #distal_acc=distal_acc[-2:]
- acc.sort(lambda x,y: x[0]-y[0])
- acc=acc[-2:]
-
- don = []
- for idx,score in enumerate(currentDon):
- if score > -inf:
- don.append((idx,score))
- if idx>self.read_size:
- break
- don.sort(lambda x,y: x[0]-y[0])
- don=don[-2:]
+ proximal_don = []
+ for idx in xrange(max_intron_size, max_intron_size+read_size):
+ if currentDon[idx] >= splice_thresh:
+ proximal_don.append((idx, currentDon[idx]))
- return don,acc
+ proximal_don.sort(lambda x,y: signum(x[1]-y[1]))
+ proximal_don=proximal_don[-2:]
+
+ distal_don = []
+ for idx in xrange(max_intron_size, max_intron_size+read_size):
+ if currentDon[idx] >= splice_thresh:
+ distal_don.append((idx, currentDon[idx], DNA[idx-read_size:idx]))
+
+ #distal_don.sort(lambda x,y: signum(x[1]-y[1]))
+ #distal_don=distal_don[-2:]
+
+ return proximal_acc,proximal_don,distal_acc,distal_don
def calcAlternativeAlignments(self,location):
"""
alternative alignments taking into account for example matched
donor/acceptor positions.
"""
- def signum(a):
- if a>0:
- return 1
- elif a<0:
- return -1
- else:
- return 0
run = self.run
+ splice_thresh = 0.01
+ max_intron_size = 2000
id = location['id']
chr = location['chr']
est = unbracket_est(original_est)
effective_len = len(est)
- genomicSeq_start = pos
- genomicSeq_stop = pos+1000#self.read_size+1#+self.intron_size*2+self.read_size*2
+ genomicSeq_start = pos - max_intron_size
+ genomicSeq_stop = pos + max_intron_size + len(est)
start = cpu()
currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
self.get_time += stop-start
dna = currentDNASeq
- alt_don,alt_acc = self.findHighestScoringSpliceSites(currentAcc,currentDon)
+ proximal_don,proximal_acc,distal_don,distal_acc = self.findHighestScoringSpliceSites(currentAcc,currentDon, max_intron_size)
- sort_acc=[ elem for elem in enumerate(currentAcc) ]
- sort_acc.sort(lambda x,y: signum(x[1]-y[1]) )
-
- (max_acc_pos,max_acc_score)=sort_acc[-1]
+ #sort_acc=[ elem for elem in enumerate(currentAcc) ]
+ #sort_acc.sort(lambda x,y: signum(x[1]-y[1]) )
#print alt_don
#print alt_acc