+ fixded index bug in feature count
[qpalma.git] / QPalmaDP / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3
4 using namespace std;
5
6 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
7
8 //printf("Current index %d dna/est: %d %d\n",(estnum-1)*6+(dnanum),dnanum,estnum);
9 int currentPos = (estnum-1)*6+(dnanum);
10 penalty_struct currentStruct = qparam[currentPos];
11
12 //printf("before\n");
13 //int p_idx;
14 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
15 // printf("%f ",currentStruct.limits[p_idx]);
16 //}
17 //printf("\n");
18
19 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
20 // printf("%f ",currentStruct.penalties[p_idx]);
21 //}
22 //printf("\n");
23
24 double value = estprb;
25 int Lower = 0;
26 int idx;
27
28 for (idx=0;idx<currentStruct.len;idx++) {
29 if (currentStruct.limits[idx] <= value)
30 Lower++;
31 }
32
33 if (Lower == 0) {
34 currentStruct.penalties[0] += 1;
35 return;
36 }
37
38 if (Lower == (currentStruct.len-1)) {
39 currentStruct.penalties[currentStruct.len-1] += 1;
40 return;
41 }
42
43 Lower -= 1;
44 int Upper = Lower+1; // x-werte bleiben fest
45
46 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
47 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
48 currentStruct.penalties[Upper] += weightup;
49 currentStruct.penalties[Lower] += weightlow;
50
51 //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
52
53 qparam[currentPos] = currentStruct;
54
55 //printf("after\n");
56 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
57 // printf("%f ",currentStruct.limits[p_idx]);
58 //}
59 //printf("\n");
60 //for(p_idx=0;p_idx<currentStruct.len;p_idx++)
61 // printf("%f ",currentStruct.penalties[p_idx]);
62 //printf("\n");
63 }
64
65 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
66 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
67
68 {
69 const int mlen=6; // length of matchmatrix
70 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
71 startend[3] = max_score_positions[2*z] ; //i (est)
72 startend[4] = max_score_positions[2*z +1] ; //j (dna)
73 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
74 int dnanum ; //using check_char: letter -> number
75 int estnum ; //using check_char: letter -> number
76
77 double prbnum ;
78 double chastitynum ;
79
80
81 int dna_pos ;
82 int est_pos ;
83 int splice_state ;
84 int est_state ;
85 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
86 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
87 }
88 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
89 e_align[est_pos-1] = 4 ;
90 }
91 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
92
93
94 /* starting point for backtracking */
95 int i = startend[3] ;
96 int j = startend[4] ;
97 int act_m_no = z ;
98 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
99 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
100
101 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
102 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
103 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
104
105 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
106 {
107 cout << "Kein weiterer Pfad vorhanden\n";
108 return 1;
109 }
110
111 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
112 cout << "Kein weiterer Pfad vorhanden\n";
113 return 1;
114 }
115
116 assert(dna_pos == j) ;
117 assert(est_pos == i) ;
118 splice_state = 0 ; //exon
119 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
120
121
122 //compute length of alignment (between max(m,n) and m+n)
123 //local_alignment: backtracking to first Zero: look at actual value
124 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
125
126 //just some info
127 //if ((z>0) && (!(z==prev_m_no))) {
128 // printf("(%d,%d): next step in next matrix\n", i,j) ;
129 //}
130
131 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
132 (*result_length_ptr)= (*result_length_ptr) + 1;
133
134 s_align[dna_pos-1] = splice_state;
135 dna_pos-- ;
136 e_align[est_pos-1] = est_state ; //1 or 2, depended
137 est_pos-- ;
138
139 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
140 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
141
142 prbnum = prb[i-1];
143 chastitynum = chastity[i-1];
144
145 if(currentMode == USE_QUALITY_SCORES)
146 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
147 else
148 mparam[mlen*dnanum+estnum] ++ ;
149
150 }
151 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
152 (*result_length_ptr)= (*result_length_ptr) + 1;
153
154 s_align[dna_pos-1] = splice_state;
155 dna_pos-- ;
156
157 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
158 estnum = 0 ; //gap
159 mparam[mlen*dnanum +estnum] ++ ;
160 }
161 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
162 (*result_length_ptr)= (*result_length_ptr) + 1;
163
164 e_align[est_pos-1] = est_state ; //1 or 2, depended
165 est_pos-- ;
166
167 dnanum = 0 ; //gap
168 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
169
170 prbnum = prb[i-1];
171 chastitynum = chastity[i-1];
172
173 if(currentMode == USE_QUALITY_SCORES)
174 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
175 else
176 mparam[mlen*dnanum +estnum] ++ ;
177 }
178 else {// splice site
179 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
180 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
181 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
182 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
183 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
184 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
185 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
186 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
187 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
188 }
189 if (est_state==1) { //was exon labeled "1"
190 est_state = 2 ; //new exon is labeled "2"
191 }
192 else {
193 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
194 }
195 for (j; j > prev_j; j--) {
196 if (splice_state == 0) { //coming from exon
197 splice_state = 2 ; //first intron_state: acceptor
198 } else {
199 splice_state = 3 ; //intron
200 }
201 if (j-1 == prev_j) { //last intron_state: donor
202 splice_state = 1 ;//donor
203 }
204 s_align[dna_pos-1] = splice_state;
205 dna_pos-- ;
206 }
207 splice_state = 0 ; //exon again
208
209 }
210
211 startend[1] = i;
212 startend[2] = j;
213 i = prev_i;
214 j = prev_j;
215 act_m_no = prev_m_no ;
216
217 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
218 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
219 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
220 }
221 for (dna_pos; dna_pos>0; dna_pos--) {
222 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
223 }
224 for (est_pos; est_pos>0; est_pos--) {
225 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
226 }
227
228 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
229 //printf("result_length: %d\n", *result_length_ptr) ;
230
231
232 return 0;
233 }