sscore_filename = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc/contig_%d%s'
acc = doIntervalQuery(chromo,strand,intervalBegin,intervalEnd,sscore_filename,total_size)
- #ag_tuple_pos = [p for p,e in enumerate(genomicSeq) if p>1 and p<len(genomicSeq) and genomicSeq[p-1]=='a' and genomicSeq[p]=='g'][:100]
- #acc_pos = [p for p,e in enumerate(acc) if e != -inf and p > 1][:100]
-
# fetch donor scores
sscore_filename = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don/contig_%d%s'
don = doIntervalQuery(chromo,strand,intervalBegin,intervalEnd,sscore_filename,total_size)
- #gt_tuple_pos = [p for p,e in enumerate(genomicSeq) if p>0 and p<len(genomicSeq)-1 and e=='g' and (genomicSeq[p+1]=='t' or genomicSeq[p+1]=='c')]
- #don_pos = [p for p,e in enumerate(don) if e != -inf and p > 1][:100]
- #pdb.set_trace()
-
return acc, don
assert genomicSeq_start < genomicSeq_stop
- genomicSeq = self.fetch_sequence(chromo,'+',genomicSeq_start,genomicSeq_stop)
+ if strand == '-':
+ genomicSeq = self.fetch_sequence(chromo+7,'+',genomicSeq_start,genomicSeq_stop)
+ else:
+ genomicSeq = self.fetch_sequence(chromo,'+',genomicSeq_start,genomicSeq_stop)
if only_seq:
return genomicSeq
total_size = self.chromo_sizes[chromo]
# shorten the intervalEnd to the maximal size of the genomic sequence if it would exceed this size
- lookup_offset = 0
- intervalBegin = max(genomicSeq_start-lookup_offset,0)
- intervalEnd = min(genomicSeq_stop+lookup_offset,total_size-1)
+ lookup_offset_begin = 10
+ lookup_offset_end = 10
+ intervalBegin = max(genomicSeq_start-lookup_offset_begin,0)
+ if intervalBegin == 0:
+ lookup_offset_begin = 0
+
+ intervalEnd = min(genomicSeq_stop+lookup_offset_end,total_size-1)
+ if intervalEnd == total_size-1:
+ lookup_offset_end = 0
+
+ currentAcc, currentDon = self.getSpliceScores(chromo,strand,intervalBegin,intervalEnd,genomicSeq)
+
+ original_acc = list(currentAcc)
- if chromo > 7:
- print intervalBegin,intervalEnd,total_size
- currentAcc, currentDon = self.getSpliceScores(chromo-7,'-',intervalBegin,intervalEnd,total_size,genomicSeq)
+ if strand == '-':
currentDon = [-inf]+currentDon[:-1]
else:
- currentAcc, currentDon = self.getSpliceScores(chromo,strand,intervalBegin,intervalEnd,total_size,genomicSeq)
currentAcc = currentAcc[2:]
currentDon = currentDon[1:]
- length = len(genomicSeq)
- currentAcc = currentAcc+[-inf]*(length-len(currentAcc))
- currentDon = currentDon+[-inf]*(length-len(currentDon))
-
- #acc_pos = [p for p,e in enumerate(currentAcc) if e != -inf and p > 1]
- #don_pos = [p for p,e in enumerate(currentDon) if e != -inf and p > 0]
+ # remove the offset positions
+ currentAcc = currentAcc[lookup_offset_begin:]
+ currentDon = currentDon[lookup_offset_begin:]
+
+ currentAcc = currentAcc[:len(genomicSeq)]
+ currentDon = currentDon[:len(genomicSeq)]
- gt_tuple_pos = [p for p,e in enumerate(genomicSeq) if p>0 and p<len(genomicSeq)-1 and e=='g' and (genomicSeq[p+1]=='t' or genomicSeq[p+1]=='c')]
- ag_tuple_pos = [p for p,e in enumerate(genomicSeq) if p>1 and p<len(genomicSeq) and genomicSeq[p-1]=='a' and genomicSeq[p]=='g']
+ #length = len(genomicSeq)
+ #currentAcc = currentAcc+[-inf]*(length-len(currentAcc))
+ #currentDon = currentDon+[-inf]*(length-len(currentDon))
+ currentDon[-1] = -inf
+
+ gt_tuple_pos = [p for p,e in enumerate(genomicSeq) if (p>0 and p<len(genomicSeq)-1 and e=='g' ) and (genomicSeq[p+1]=='t' or genomicSeq[p+1]=='c')]
+ ag_tuple_pos = [p for p,e in enumerate(genomicSeq) if p>1 and genomicSeq[p-1]=='a' and genomicSeq[p]=='g']
#pdb.set_trace()
#return genomicSeq, currentAcc, currentDon
if pos+intervalBegin > total_size - NO_SCORE_WINDOW_SIZE and currentDon[pos] == -inf:
currentDon[pos] = 1e-6
- assert ag_tuple_pos == [p for p,e in enumerate(currentAcc) if e != -inf and p > 1], pdb.set_trace()
- assert gt_tuple_pos == [p for p,e in enumerate(currentDon) if e != -inf and p > 0], pdb.set_trace()
+ acc_pos = [p for p,e in enumerate(currentAcc) if e != -inf and p > 1]
+ don_pos = [p for p,e in enumerate(currentDon) if e != -inf and p > 0]
+
+ if not ag_tuple_pos == acc_pos:
+ print 'ACC: Chromo/strand: %d/%s' % (chromo,strand)
+ print ag_tuple_pos[:10],ag_tuple_pos[-10:]
+ print acc_pos[:10],acc_pos[-10:]
+ #pdb.set_trace()
+
+ if not gt_tuple_pos == don_pos:
+ print 'DON: Chromo/strand: %d/%s' % (chromo,strand)
+ print gt_tuple_pos[:10],gt_tuple_pos[-10:]
+ print don_pos[:10],don_pos[-10:]
+ #pdb.set_trace()
+
+ #assert ag_tuple_pos == acc_pos, pdb.set_trace()
+ #assert gt_tuple_pos == don_pos, pdb.set_trace()
assert len(genomicSeq) == len(currentAcc) == len(currentDon), pdb.set_trace()
return genomicSeq, currentAcc, currentDon