import qparser
+from Utils import pprint_alignment
+
+import palma.palma_utils as pu
+from palma.output_formating import print_results
+
+
+def alignment_reconstruct(current_prediction,num_exons):
+ """
+ We reconstruct the exact alignment to infer the positions of indels and the
+ sizes of the respective exons.
+ """
+
+ SpliceAlign = current_prediction['spliceAlign']
+ estAlign = current_prediction['estAlign']
+
+ dna_array = current_prediction['dna_array']
+ est_array = current_prediction['est_array']
+
+ dna_array = "".join(map(lambda x: str(x),dna_array))
+ est_array = "".join(map(lambda x: str(x),est_array))
+
+ translation = '-ACGTN_' #how aligned est and dna sequence is displayed
+ #(gap_char, 5 nucleotides, intron_char)
+
+ # this array hold a number for each exons indicating its number of matches
+ exonIdentities = [0]*num_exons
+ exonGaps = [0]*num_exons
+
+ est_array = map(lambda x: translation[int(x)],est_array)
+ dna_array = map(lambda x: translation[int(x)],dna_array)
+
+ gaps = 0
+ identity = 0
+ idx = 0
+
+ #if num_exons > 1:
+ # pdb.set_trace()
+
+ for ridx in range(len(est_array)):
+ read_elem = est_array[ridx]
+ dna_elem = dna_array[ridx]
+
+ if ridx > 0 and est_array[ridx-1] != '_' and est_array[ridx] == '_':
+ idx += 1
+ gaps = 0
+ identity = 0
+
+ if read_elem == '_':
+ continue
+
+ if read_elem == dna_elem:
+ identity += 1
+
+ if read_elem == '-':
+ gaps += 1
+
+ exonIdentities[idx] = identity
+ exonGaps[idx] = gaps
+
+ return exonIdentities,exonGaps
+
+
def create_alignment_file(current_loc,out_fname):
import numpy
#qparser.parse_reads(filtered_reads)
out_fh = open(out_fname,'w+')
allPredictions = cPickle.load(open(current_loc))
+ #allPredictions = current_loc
allPositions = {}
predExons = current_prediction['predExons']
predExons = [e+start_pos for e in predExons]
-
(qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds,\
tExonSizes,tStarts, tEnds) = alignment
-
EST2GFF = False
+ new_line = ''
+
if EST2GFF:
predExons = numpy.mat(predExons).reshape(len(predExons)/2,2)
# &strand, Qname, &Qsize,
# &Qstart, &Qend, Tname, &Tsize,
# &Tstart, &Tend, &blockcnt,exonsizes_str,Qstarts_str,Tstarts_str
-
#
# ATTENTION
#
Qstarts_str = str(qStarts)[1:-1].replace(' ','')
Tstarts_str = str(map(lambda x: x+start_pos,tStarts))[1:-1].replace(' ','')
- try:
- new_line = '%d\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
+ new_line = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n"\
+ % (match, mismatch, repmatch, Ns, Qgapcnt, Qgapbs,\
+ Tgapcnt, Tgapbs, strand, Qname, Qsize, Qstart, Qend,\
+ Tname, Tsize, Tstart, Tend,\
+ blockcnt,exonsizes_str,Qstarts_str,Tstarts_str) #str(predExons))
+
+ else:
+
+ num_exons = len(qExonSizes)
+ exonIdentities,exonGaps = alignment_reconstruct(current_prediction,num_exons)
+
+ pp = lambda x : str(x)[1:-1].replace(' ','')
+
+ new_line = '%d\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
(id,chromo,strand,seq,str(q1)[1:-1],start_pos,qStart,qEnd,tStart,tEnd,num_exons,\
str(qExonSizes)[1:-1].replace(' ',''),str(qStarts)[1:-1].replace(' ',''),\
str(qEnds)[1:-1].replace(' ',''),str(tExonSizes)[1:-1].replace(' ',''),\
- str(tStarts)[1:-1].replace(' ',''),str(tEnds)[1:-1].replace(' ',''))
- except:
- pdb.set_trace()
-
- #new_line = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n"\
- #% (match, mismatch, repmatch, Ns, Qgapcnt, Qgapbs,\
- #Tgapcnt, Tgapbs, strand, Qname, Qsize, Qstart, Qend,\
- #Tname, Tsize, Tstart, Tend,\
- #blockcnt,exonsizes_str,Qstarts_str,Tstarts_str) #str(predExons))
+ str(tStarts)[1:-1].replace(' ',''),str(tEnds)[1:-1].replace(' ',''),\
+ str(exonIdentities)[1:-1].replace(' ',''),str(exonGaps)[1:-1].replace(' ',''))
out_fh.write(new_line)
return allPositions
+
if __name__ == '__main__':
current_dir = sys.argv[1]
out_fh = sys.argv[2]