+ fixed some index bugs in the c code
[qpalma.git] / QPalmaDP / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3
4 using namespace std;
5
6 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
7
8 if(dnanum == 0) {
9 //printf("Zero entry\n");
10 return;
11 }
12
13 penalty_struct currentStruct = qparam[(estnum-1)*5+(dnanum-1)];
14
15 // printf("before\n");
16 // int p_idx;
17 // for(p_idx=0;p_idx<10;p_idx++) {
18 // printf("%f ",currentStruct.limits[p_idx]);
19 // }
20 // printf("\n");
21 //
22 // for(p_idx=0;p_idx<10;p_idx++) {
23 // printf("%f ",currentStruct.penalties[p_idx]);
24 // }
25 // printf("\n");
26
27 double value = estprb;
28 int Lower = 0;
29 int idx;
30 for (idx=0;idx<currentStruct.len;idx++) {
31 if (currentStruct.limits[idx] <= value)
32 Lower++;
33 }
34
35 if (Lower == 0)
36 currentStruct.penalties[0] += 1;
37
38 if (Lower == currentStruct.len)
39 currentStruct.penalties[currentStruct.len-1] += 1;
40
41 Lower -= 1;
42 int Upper = Lower+1; // x-werte bleiben fest
43 currentStruct.penalties[Upper] += 1;
44 currentStruct.penalties[Lower] += 1;
45
46 //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
47
48 qparam[(estnum-1)*5+(dnanum-1)] = currentStruct;
49
50 //printf("after\n");
51 //for(p_idx=0;p_idx<10;p_idx++) {
52 // printf("%f ",currentStruct.limits[p_idx]);
53 //}
54 //printf("\n");
55 //for(p_idx=0;p_idx<10;p_idx++)
56 // printf("%f ",currentStruct.penalties[p_idx]);
57 //printf("\n");
58 }
59
60 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam)
61 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
62
63 {
64 const int mlen=6; // length of matchmatrix
65 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
66 startend[3] = max_score_positions[2*z] ; //i (est)
67 startend[4] = max_score_positions[2*z +1] ; //j (dna)
68 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
69 int dnanum ; //using check_char: letter -> number
70 int estnum ; //using check_char: letter -> number
71
72 double prbnum ;
73 double chastitynum ;
74
75 int dna_pos ;
76 int est_pos ;
77 int splice_state ;
78 int est_state ;
79 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
80 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
81 }
82 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
83 e_align[est_pos-1] = 4 ;
84 }
85 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
86
87
88 /* starting point for backtracking */
89 int i = startend[3] ;
90 int j = startend[4] ;
91 int act_m_no = z ;
92 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
93 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
94
95 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
96 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
97 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
98
99 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
100 {
101 cout << "Kein weiterer Pfad vorhanden\n";
102 return 1;
103 }
104
105 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
106 cout << "Kein weiterer Pfad vorhanden\n";
107 return 1;
108 }
109
110 assert(dna_pos == j) ;
111 assert(est_pos == i) ;
112 splice_state = 0 ; //exon
113 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
114
115 //compute length of alignment (between max(m,n) and m+n)
116 //local_alignment: backtracking to first Zero: look at actual value
117 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
118
119 //just some info
120 //if ((z>0) && (!(z==prev_m_no))) {
121 // printf("(%d,%d): next step in next matrix\n", i,j) ;
122 //}
123
124 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
125 (*result_length_ptr)= (*result_length_ptr) + 1;
126
127 s_align[dna_pos-1] = splice_state;
128 dna_pos-- ;
129 e_align[est_pos-1] = est_state ; //1 or 2, depended
130 est_pos-- ;
131
132 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
133 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
134
135 prbnum = prb[i-1];
136 chastitynum = chastity[i-1];
137 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
138
139 mparam[mlen*dnanum +estnum] ++ ;
140 }
141 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
142 (*result_length_ptr)= (*result_length_ptr) + 1;
143
144 s_align[dna_pos-1] = splice_state;
145 dna_pos-- ;
146
147 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
148 estnum = 0 ; //gap
149 mparam[mlen*dnanum +estnum] ++ ;
150 }
151 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
152 (*result_length_ptr)= (*result_length_ptr) + 1;
153
154 e_align[est_pos-1] = est_state ; //1 or 2, depended
155 est_pos-- ;
156
157 dnanum = 0 ; //gap
158 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
159
160 prbnum = prb[i-1];
161 chastitynum = chastity[i-1];
162 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
163
164 mparam[mlen*dnanum +estnum] ++ ;
165 }
166 else {// splice site
167 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
168 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
169 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
170 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
171 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
172 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
173 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
174 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
175 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
176 }
177 if (est_state==1) { //was exon labeled "1"
178 est_state = 2 ; //new exon is labeled "2"
179 }
180 else {
181 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
182 }
183 for (j; j > prev_j; j--) {
184 if (splice_state == 0) { //coming from exon
185 splice_state = 2 ; //first intron_state: acceptor
186 } else {
187 splice_state = 3 ; //intron
188 }
189 if (j-1 == prev_j) { //last intron_state: donor
190 splice_state = 1 ;//donor
191 }
192 s_align[dna_pos-1] = splice_state;
193 dna_pos-- ;
194 }
195 splice_state = 0 ; //exon again
196
197 }
198
199 startend[1] = i;
200 startend[2] = j;
201 i = prev_i;
202 j = prev_j;
203 act_m_no = prev_m_no ;
204
205 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
206 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
207 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
208 }
209 for (dna_pos; dna_pos>0; dna_pos--) {
210 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
211 }
212 for (est_pos; est_pos>0; est_pos--) {
213 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
214 }
215
216 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
217 //printf("result_length: %d\n", *result_length_ptr) ;
218
219
220 return 0;
221 }
222
223