from numpy.matlib import mat,zeros,ones,inf
from numpy import inf,mean
-from qpalma.parsers import *
+#from qpalma.parsers import *
-from ParaParser import *
+from ParaParser import ParaParser,IN_VECTOR
from qpalma.Lookup import LookupTable
+from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper,reverse_complement,unbracket_seq
-from qpalma.sequence_utils import reverse_complement,unbracket_seq
+
+def cpu():
+ return (resource.getrusage(resource.RUSAGE_SELF).ru_utime+\
+ resource.getrusage(resource.RUSAGE_SELF).ru_stime)
class BracketWrapper:
- #fields = ['id', 'chr', 'pos', 'strand', 'mismatches', 'length',\
- #'offset', 'seq', 'prb', 'cal_prb', 'chastity']
fields = ['id', 'chr', 'pos', 'strand', 'seq', 'prb']
def __init__(self,filename):
self.run = run
start = cpu()
- # old version
- #self.all_remapped_reads = parse_map_vm_heuristic(data_fname)
self.all_remapped_reads = BracketWrapper(data_fname)
stop = cpu()
g_fmt = 'chr%d.dna.flat'
s_fmt = 'contig_%d%s'
- self.chromo_range = range(1,6)
+ #self.chromo_range = range(1,6)
+ self.chromo_range = range(1,2)
+
+ num_chromo = 2
+ accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+ self.seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
start = cpu()
self.lt1 = LookupTable(g_dir,acc_dir,don_dir,g_fmt,s_fmt,self.chromo_range)
"""
run = self.run
-
ctr = 0
unspliced_ctr = 0
spliced_ctr = 0
chr = location['chr']
pos = location['pos']
strand = location['strand']
- #mismatch = location['mismatches']
- #length = location['length']
- #off = location['offset']
seq = location['seq']
prb = location['prb']
- #cal_prb = location['cal_prb']
- #chastity = location['chastity']
id = int(id)
if strand == '-':
#pos = self.lt1.seqInfo.chromo_sizes[chr]-pos-self.read_size
unb_seq = reverse_complement(unb_seq)
+ seq = reverse_complement(seq)
effective_len = len(unb_seq)
genomicSeq_stop = pos+effective_len
start = cpu()
- #currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
- currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
+ currentDNASeq_, currentAcc_, currentDon_ = self.seqInfo.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,False)
+ currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,False)
+
+ assert currentDNASeq_ == currentDNASeq
+ assert currentAcc_ == currentAcc_
+ assert currentDon_ == currentDon_
stop = cpu()
self.get_time += stop-start
original_est = seq
quality = map(lambda x:ord(x)-64,prb)
- #pdb.set_trace()
-
currentVMatchAlignment = dna, exons, est, original_est, quality,\
currentAcc, currentDon
- try:
- alternativeAlignmentScores = self.calcAlternativeAlignments(location)
- except:
- alternativeAlignmentScores = []
+ #pdb.set_trace()
+
+ alternativeAlignmentScores = self.calcAlternativeAlignments(location)
+ #try:
+ # alternativeAlignmentScores = self.calcAlternativeAlignments(location)
+ #except:
+ # alternativeAlignmentScores = []
if alternativeAlignmentScores == []:
return proximal_acc,proximal_don,distal_acc,distal_don
+
def calcAlternativeAlignments(self,location):
"""
Given an alignment proposed by Vmatch this function calculates possible
original_est = location['seq']
quality = location['prb']
quality = map(lambda x:ord(x)-64,quality)
- #cal_prb = location['cal_prb']
original_est = original_est.lower()
est = unbracket_seq(original_est)
strand_map = {'D':'+', 'P':'-'}
strand = strand_map[strand]
+ if strand == '-':
+ est = reverse_complement(est)
+ original_est = reverse_complement(original_est)
+
start = cpu()
#currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop, run['dna_flat_files'])
currentDNASeq, currentAcc, currentDon = self.lt1.get_seq_and_scores(chr, strand, genomicSeq_start, genomicSeq_stop, run['dna_flat_files'])
self.calcAlignmentScoreTime += stop-start
return score
-
-
-def cpu():
- return (resource.getrusage(resource.RUSAGE_SELF).ru_utime+\
- resource.getrusage(resource.RUSAGE_SELF).ru_stime)
-
-
-if __name__ == '__main__':
- if len(sys.argv) != 6:
- print 'Usage: ./%s data param run spliced.results unspliced.results' % (sys.argv[0])
- sys.exit(1)
-
- data_fname = sys.argv[1]
- param_fname = sys.argv[2]
- run_fname = sys.argv[3]
-
- result_spliced_fname = sys.argv[4]
- result_unspliced_fname = sys.argv[5]
-
- jp = os.path.join
-
- ph1 = PipelineHeuristic(run_fname,data_fname,param_fname,result_spliced_fname,result_unspliced_fname)
-
- start = cpu()
- ph1.filter()
- stop = cpu()
- print 'total time elapsed: %f' % (stop-start)
-
- print 'time spend for get seq: %f' % ph1.get_time
- print 'time spend for calcAlignmentScoreTime: %f' % ph1.calcAlignmentScoreTime
- print 'time spend for alternativeScoresTime: %f' % ph1.alternativeScoresTime
- print 'time spend for count time: %f' % ph1.count_time
- print 'time spend for init time: %f' % ph1.init_time
- print 'time spend for main_loop time: %f' % ph1.main_loop
- print 'time spend for splice_site_time time: %f' % ph1.splice_site_time
-
- print 'time spend for computeSpliceAlignWithQualityTime time: %f'% ph1.computeSpliceAlignWithQualityTime
- print 'time spend for computeSpliceWeightsTime time: %f'% ph1.computeSpliceWeightsTime
- print 'time spend for DotProdTime time: %f'% ph1.DotProdTime
- print 'time spend forarray_stuff time: %f'% ph1.array_stuff