+ added code for negative strand processing
[qpalma.git] / dyn_prog / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3 #include "debug_tools.h"
4
5 using namespace std;
6
7 void increaseFeatureCount(penalty_struct* qparam, int dnaChar, int estChar, double estprb) {
8
9 FA(estChar > 0 && estChar < 6);
10 FA(dnaChar > -1 && dnaChar < 6);
11
12 int currentPos = (estChar-1)*6+dnaChar;
13
14 penalty_struct currentStruct = qparam[currentPos];
15
16 //printf("Current index %d est/dna: %d %d score %f\n",currentPos,estChar,dnaChar,estprb);
17
18 //if( estChar == 1 and dnaChar == 1) {
19 // printf("before\n");
20 // int p_idx;
21 // for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
22 // printf("%f ",currentStruct.limits[p_idx]);
23 // }
24 // printf("\n");
25
26 // for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
27 // printf("%f ",currentStruct.penalties[p_idx]);
28 // }
29 // printf("\n");
30 //}
31
32 double value = estprb;
33 int Lower = 0;
34 int idx;
35
36 for (idx=0;idx<currentStruct.len;idx++) {
37 if (currentStruct.limits[idx] <= value)
38 Lower++;
39 }
40
41 if (Lower == 0) {
42 currentStruct.penalties[0] += 1;
43 qparam[currentPos] = currentStruct;
44 return;
45 }
46
47 if (Lower == currentStruct.len) {
48 currentStruct.penalties[currentStruct.len-1] += 1;
49 qparam[currentPos] = currentStruct;
50 return;
51 }
52
53 Lower -= 1;
54 int Upper = Lower+1; // x-werte bleiben fest
55
56 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
57 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
58 currentStruct.penalties[Upper] += weightup;
59 currentStruct.penalties[Lower] += weightlow;
60
61 //if( estChar == 1 and dnaChar == 1) {
62 // printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
63 // int p_idx;
64 // printf("after\n");
65 // for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
66 // printf("%f ",currentStruct.limits[p_idx]);
67 // }
68 // printf("\n");
69 // for(p_idx=0;p_idx<currentStruct.len;p_idx++)
70 // printf("%f ",currentStruct.penalties[p_idx]);
71 // printf("\n");
72 //}
73 qparam[currentPos] = currentStruct;
74 }
75
76 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
77 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
78
79 {
80 const int mlen=6; // length of matchmatrix
81 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
82 startend[3] = max_score_positions[2*z] ; //i (est)
83 startend[4] = max_score_positions[2*z +1] ; //j (dna)
84 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
85 int dnanum ; //using check_char: letter -> number
86 int estnum ; //using check_char: letter -> number
87
88 double prbnum ;
89 double chastitynum ;
90
91 int dna_pos ;
92 int est_pos ;
93 int splice_state ;
94 int est_state ;
95 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
96 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
97 }
98 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
99 e_align[est_pos-1] = 4 ;
100 }
101 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
102
103 /* starting point for backtracking */
104 int i = startend[3] ;
105 int j = startend[4] ;
106 int act_m_no = z ;
107 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
108 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
109
110 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
111 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
112 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
113
114 //printf("Current z is %d\n",z);
115
116 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
117 {
118 cout << "No additional path left (inf)!\n";
119 return 1;
120 }
121
122 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
123 cout << "No additional path left (0)!\n";
124 return 1;
125 }
126
127 assert(dna_pos == j) ;
128 assert(est_pos == i) ;
129
130 splice_state = 0 ; //exon
131 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
132
133 //compute length of alignment (between max(m,n) and m+n)
134 //local_alignment: backtracking to first Zero: look at actual value
135 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
136
137 //just some info
138 //if ((z>0) && (!(z==prev_m_no))) {
139 // printf("(%d,%d): next step in next matrix\n", i,j) ;
140 //}
141
142 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
143 (*result_length_ptr)= (*result_length_ptr) + 1;
144
145 s_align[dna_pos-1] = splice_state;
146 dna_pos-- ;
147 e_align[est_pos-1] = est_state ; //1 or 2, depended
148 est_pos-- ;
149
150 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
151 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
152
153 prbnum = prb[i-1];
154 chastitynum = chastity[i-1];
155
156 //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
157
158 if(currentMode == USE_QUALITY_SCORES)
159 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
160 else
161 mparam[mlen*dnanum+estnum] ++ ;
162
163 }
164 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
165 (*result_length_ptr)= (*result_length_ptr) + 1;
166
167 s_align[dna_pos-1] = splice_state;
168 dna_pos-- ;
169
170 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
171 estnum = 0 ; //gap
172
173 if(currentMode == USE_QUALITY_SCORES)
174 mparam[dnanum] ++ ;
175 else
176 mparam[mlen*dnanum +estnum] ++ ;
177
178 }
179 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
180 (*result_length_ptr)= (*result_length_ptr) + 1;
181
182 e_align[est_pos-1] = est_state ; //1 or 2, depended
183 est_pos-- ;
184
185 dnanum = 0 ; //gap
186 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
187
188 prbnum = prb[i-1];
189 chastitynum = chastity[i-1];
190
191 //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
192
193 if(currentMode == USE_QUALITY_SCORES)
194 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
195 else
196 mparam[mlen*dnanum +estnum] ++ ;
197 }
198 else {// splice site
199 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
200 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
201 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
202 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
203 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
204 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
205 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
206 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
207 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
208 }
209 if (est_state==1) { //was exon labeled "1"
210 est_state = 2 ; //new exon is labeled "2"
211 }
212 else {
213 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
214 }
215 for (j; j > prev_j; j--) {
216 if (splice_state == 0) { //coming from exon
217 splice_state = 2 ; //first intron_state: acceptor
218 } else {
219 splice_state = 3 ; //intron
220 }
221 if (j-1 == prev_j) { //last intron_state: donor
222 splice_state = 1 ;//donor
223 }
224 s_align[dna_pos-1] = splice_state;
225 dna_pos-- ;
226 }
227 splice_state = 0 ; //exon again
228
229 }
230
231 startend[1] = i;
232 startend[2] = j;
233 i = prev_i;
234 j = prev_j;
235 act_m_no = prev_m_no ;
236
237 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
238 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
239 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
240 }
241 for (dna_pos; dna_pos>0; dna_pos--) {
242 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
243 }
244 for (est_pos; est_pos>0; est_pos--) {
245 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
246 }
247
248 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
249 //printf("result_length: %d\n", *result_length_ptr) ;
250 return 0;
251 }