git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8640 e1793c9e...
[qpalma.git] / scripts / Utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from numpy.matlib import mat,zeros,ones,inf
5
6 import palma.palma_utils as pu
7 from palma.output_formating import print_results
8
9 def calc_info(acc,don,exons,qualities):
10 min_score = 100
11 max_score = -100
12
13 for elem in acc:
14 for score in elem:
15 if score != -inf:
16 min_score = min(min_score,score)
17 max_score = max(max_score,score)
18
19 print 'Acceptors max/min: %f / %f' % (max_score,min_score)
20
21 min_score = 100
22 max_score = -100
23
24 for elem in don:
25 for score in elem:
26 if score != -inf:
27 min_score = min(min_score,score)
28 max_score = max(max_score,score)
29
30 print 'Donor max/min: %f / %f' % (max_score,min_score)
31
32 min_score = 10000
33 max_score = 0
34 mean_intron_len = 0
35
36 for elem in exons:
37 intron_len = int(elem[1,0] - elem[0,1])
38 mean_intron_len += intron_len
39 min_score = min(min_score,intron_len)
40 max_score = max(max_score,intron_len)
41
42 mean_intron_len /= 1.0*len(exons)
43 print 'Intron length max/min: %d / %d mean length %f' % (max_score,min_score,mean_intron_len)
44
45 min_score = 10000
46 max_score = 0
47 mean_quality = 0
48
49 for qVec in qualities:
50 for elem in qVec:
51 min_score = min(min_score,elem)
52 max_score = max(max_score,elem)
53
54 print 'Quality values max/min: %d / %d mean' % (max_score,min_score)
55
56
57 def calc_stat(Acceptor,Donor,Exons):
58 maxAcc = -100
59 minAcc = 100
60 maxDon = -100
61 minDon = 100
62
63 acc_vec_pos = []
64 acc_vec_neg = []
65 don_vec_pos = []
66 don_vec_neg = []
67
68 for jdx in range(len(Acceptors)):
69 currentExons = Exons[jdx]
70 currentAcceptor = Acceptors[jdx]
71 currentAcceptor = currentAcceptor[1:]
72 currentAcceptor.append(-inf)
73 currentDonor = Donors[jdx]
74
75 for idx,elem in enumerate(currentAcceptor):
76 if idx == (int(currentExons[1,0])-1): # acceptor site
77 acc_vec_pos.append(elem)
78 else:
79 acc_vec_neg.append(elem)
80
81 for idx,elem in enumerate(currentDonor):
82 if idx == (int(currentExons[0,1])): # donor site
83 don_vec_pos.append(elem)
84 else:
85 don_vec_neg.append(elem)
86
87 acc_pos = [elem for elem in acc_vec_pos if elem != -inf]
88 acc_neg = [elem for elem in acc_vec_neg if elem != -inf]
89 don_pos = [elem for elem in don_vec_pos if elem != -inf]
90 don_neg = [elem for elem in don_vec_neg if elem != -inf]
91
92 for idx in range(len(Sequences)):
93 acc = [elem for elem in Acceptors[idx] if elem != -inf]
94 maxAcc = max(max(acc),maxAcc)
95 minAcc = min(min(acc),minAcc)
96
97 don = [elem for elem in Donors[idx] if elem != -inf]
98 maxDon = max(max(don),maxDon)
99 minDon = min(min(don),minDon)
100
101
102 def pprint_alignment(_newSpliceAlign,_newEstAlign, dna_array, est_array):
103 (qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds, tExonSizes, tStarts, tEnds) =\
104 pu.get_splice_info(_newSpliceAlign,_newEstAlign)
105
106 t_strand = '+'
107 translation = '-ACGTN_' #how aligned est and dna sequence is displayed
108 #(gap_char, 5 nucleotides, intron_char)
109 comparison_char = '| ' #first: match_char, second: intron_char
110
111 (exon_length, identity, ssQuality, exonQuality, comparison, qIDX, tIDX) =\
112 pu.comp_identity(dna_array, est_array, t_strand, qStart, tStart, translation, comparison_char)
113
114 first_identity = None
115 last_identity = None
116 for i in range(len(comparison)):
117 if comparison[i] == '|' and first_identity is None:
118 first_identity = i
119 if comparison[-i] == '|' and last_identity is None:
120 last_identity = len(comparison) - i - 1
121
122 try:
123 for idx in range(len(dna_array)):
124 dna_array[idx] = translation[int(dna_array[idx])]
125 est_array[idx] = translation[int(est_array[idx])]
126 except:
127 pdb.set_trace()
128
129 line1 = "".join(dna_array)
130 line2 = comparison
131 line3 = "".join(est_array)
132
133 return line1,line2,line3
134
135
136 def get_alignment(_newSpliceAlign,_newEstAlign, dna_array, est_array):
137 return pu.get_splice_info(_newSpliceAlign,_newEstAlign)