+ renamed files
[qpalma.git] / dyn_prog / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3 #include "debug_tools.h"
4
5 using namespace std;
6
7 void increaseFeatureCount(penalty_struct* qparam, int dnaChar, int estChar, double estprb) {
8
9 FA(estChar > 0 && estChar < 6);
10 FA(dnaChar > -1 && dnaChar < 6);
11
12 int currentPos = (estChar-1)*6+dnaChar;
13
14 penalty_struct currentStruct = qparam[currentPos];
15
16 //printf("Current index %d est/dna: %d %d score %f\n",currentPos,estChar,dnaChar,estprb);
17
18 //printf("before\n");
19 //int p_idx;
20 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
21 // printf("%f ",currentStruct.limits[p_idx]);
22 //}
23 //printf("\n");
24
25 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
26 // printf("%f ",currentStruct.penalties[p_idx]);
27 //}
28 //printf("\n");
29
30 double value = estprb;
31 int Lower = 0;
32 int idx;
33
34 for (idx=0;idx<currentStruct.len;idx++) {
35 if (currentStruct.limits[idx] <= value)
36 Lower++;
37 }
38
39 if (Lower == 0) {
40 currentStruct.penalties[0] += 1;
41 qparam[currentPos] = currentStruct;
42 return;
43 }
44
45 if (Lower == currentStruct.len) {
46 currentStruct.penalties[currentStruct.len-1] += 1;
47 qparam[currentPos] = currentStruct;
48 return;
49 }
50
51 Lower -= 1;
52 int Upper = Lower+1; // x-werte bleiben fest
53
54 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
55 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
56 currentStruct.penalties[Upper] += weightup;
57 currentStruct.penalties[Lower] += weightlow;
58
59 //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
60 //printf("after\n");
61 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
62 // printf("%f ",currentStruct.limits[p_idx]);
63 //}
64 //printf("\n");
65 //for(p_idx=0;p_idx<currentStruct.len;p_idx++)
66 // printf("%f ",currentStruct.penalties[p_idx]);
67 //printf("\n");
68 qparam[currentPos] = currentStruct;
69 }
70
71 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
72 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
73
74 {
75 const int mlen=6; // length of matchmatrix
76 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
77 startend[3] = max_score_positions[2*z] ; //i (est)
78 startend[4] = max_score_positions[2*z +1] ; //j (dna)
79 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
80 int dnanum ; //using check_char: letter -> number
81 int estnum ; //using check_char: letter -> number
82
83 double prbnum ;
84 double chastitynum ;
85
86 int dna_pos ;
87 int est_pos ;
88 int splice_state ;
89 int est_state ;
90 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
91 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
92 }
93 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
94 e_align[est_pos-1] = 4 ;
95 }
96 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
97
98 /* starting point for backtracking */
99 int i = startend[3] ;
100 int j = startend[4] ;
101 int act_m_no = z ;
102 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
103 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
104
105 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
106 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
107 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
108
109 //printf("Current z is %d\n",z);
110
111 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
112 {
113 cout << "No additional path left (inf)!\n";
114 return 1;
115 }
116
117 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
118 cout << "No additional path left (0)!\n";
119 return 1;
120 }
121
122 assert(dna_pos == j) ;
123 assert(est_pos == i) ;
124
125 splice_state = 0 ; //exon
126 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
127
128 //compute length of alignment (between max(m,n) and m+n)
129 //local_alignment: backtracking to first Zero: look at actual value
130 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
131
132 //just some info
133 //if ((z>0) && (!(z==prev_m_no))) {
134 // printf("(%d,%d): next step in next matrix\n", i,j) ;
135 //}
136
137 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
138 (*result_length_ptr)= (*result_length_ptr) + 1;
139
140 s_align[dna_pos-1] = splice_state;
141 dna_pos-- ;
142 e_align[est_pos-1] = est_state ; //1 or 2, depended
143 est_pos-- ;
144
145 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
146 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
147
148 prbnum = prb[i-1];
149 chastitynum = chastity[i-1];
150
151 //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
152
153 if(currentMode == USE_QUALITY_SCORES)
154 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
155 else
156 mparam[mlen*dnanum+estnum] ++ ;
157
158 }
159 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
160 (*result_length_ptr)= (*result_length_ptr) + 1;
161
162 s_align[dna_pos-1] = splice_state;
163 dna_pos-- ;
164
165 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
166 estnum = 0 ; //gap
167
168 if(currentMode == USE_QUALITY_SCORES)
169 mparam[dnanum] ++ ;
170 else
171 mparam[mlen*dnanum +estnum] ++ ;
172
173 }
174 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
175 (*result_length_ptr)= (*result_length_ptr) + 1;
176
177 e_align[est_pos-1] = est_state ; //1 or 2, depended
178 est_pos-- ;
179
180 dnanum = 0 ; //gap
181 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
182
183 prbnum = prb[i-1];
184 chastitynum = chastity[i-1];
185
186 //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
187
188 if(currentMode == USE_QUALITY_SCORES)
189 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
190 else
191 mparam[mlen*dnanum +estnum] ++ ;
192 }
193 else {// splice site
194 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
195 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
196 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
197 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
198 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
199 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
200 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
201 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
202 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
203 }
204 if (est_state==1) { //was exon labeled "1"
205 est_state = 2 ; //new exon is labeled "2"
206 }
207 else {
208 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
209 }
210 for (j; j > prev_j; j--) {
211 if (splice_state == 0) { //coming from exon
212 splice_state = 2 ; //first intron_state: acceptor
213 } else {
214 splice_state = 3 ; //intron
215 }
216 if (j-1 == prev_j) { //last intron_state: donor
217 splice_state = 1 ;//donor
218 }
219 s_align[dna_pos-1] = splice_state;
220 dna_pos-- ;
221 }
222 splice_state = 0 ; //exon again
223
224 }
225
226 startend[1] = i;
227 startend[2] = j;
228 i = prev_i;
229 j = prev_j;
230 act_m_no = prev_m_no ;
231
232 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
233 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
234 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
235 }
236 for (dna_pos; dna_pos>0; dna_pos--) {
237 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
238 }
239 for (est_pos; est_pos>0; est_pos--) {
240 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
241 }
242
243 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
244 //printf("result_length: %d\n", *result_length_ptr) ;
245 return 0;
246 }