+ disabled don/acc scores properly
[qpalma.git] / QPalmaDP / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3
4 using namespace std;
5
6 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
7
8 if(dnanum == 0) {
9 //printf("Zero entry\n");
10 return;
11 }
12
13 if(estnum == 0) { // we have a gap
14 //printf("Zero entry\n");
15 return;
16 }
17
18 penalty_struct currentStruct = qparam[(estnum-1)*5+(dnanum-1)];
19 printf("Current index %d\n",(estnum-1)*5+(dnanum-1));
20
21 printf("before\n");
22 int p_idx;
23 for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
24 printf("%f ",currentStruct.limits[p_idx]);
25 }
26 printf("\n");
27
28 for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
29 printf("%f ",currentStruct.penalties[p_idx]);
30 }
31 printf("\n");
32
33 double value = estprb;
34 int Lower = 0;
35 int idx;
36 for (idx=0;idx<currentStruct.len;idx++) {
37 if (currentStruct.limits[idx] <= value)
38 Lower++;
39 }
40
41 if (Lower == 0)
42 currentStruct.penalties[0] += 1;
43
44 if (Lower == currentStruct.len)
45 currentStruct.penalties[currentStruct.len-1] += 1;
46
47 Lower -= 1;
48 int Upper = Lower+1; // x-werte bleiben fest
49
50 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
51 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
52 currentStruct.penalties[Upper] += weightup;
53 currentStruct.penalties[Lower] += weightlow;
54
55 //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
56
57 qparam[(estnum-1)*5+(dnanum-1)] = currentStruct;
58
59 printf("after\n");
60 for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
61 printf("%f ",currentStruct.limits[p_idx]);
62 }
63 printf("\n");
64 for(p_idx=0;p_idx<currentStruct.len;p_idx++)
65 printf("%f ",currentStruct.penalties[p_idx]);
66 printf("\n");
67 }
68
69 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam)
70 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
71
72 {
73 const int mlen=6; // length of matchmatrix
74 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
75 startend[3] = max_score_positions[2*z] ; //i (est)
76 startend[4] = max_score_positions[2*z +1] ; //j (dna)
77 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
78 int dnanum ; //using check_char: letter -> number
79 int estnum ; //using check_char: letter -> number
80
81 double prbnum ;
82 double chastitynum ;
83
84 int dna_pos ;
85 int est_pos ;
86 int splice_state ;
87 int est_state ;
88 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
89 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
90 }
91 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
92 e_align[est_pos-1] = 4 ;
93 }
94 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
95
96
97 /* starting point for backtracking */
98 int i = startend[3] ;
99 int j = startend[4] ;
100 int act_m_no = z ;
101 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
102 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
103
104 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
105 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
106 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
107
108 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
109 {
110 cout << "Kein weiterer Pfad vorhanden\n";
111 return 1;
112 }
113
114 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
115 cout << "Kein weiterer Pfad vorhanden\n";
116 return 1;
117 }
118
119 assert(dna_pos == j) ;
120 assert(est_pos == i) ;
121 splice_state = 0 ; //exon
122 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
123
124 //compute length of alignment (between max(m,n) and m+n)
125 //local_alignment: backtracking to first Zero: look at actual value
126 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
127
128 //just some info
129 //if ((z>0) && (!(z==prev_m_no))) {
130 // printf("(%d,%d): next step in next matrix\n", i,j) ;
131 //}
132
133 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
134 (*result_length_ptr)= (*result_length_ptr) + 1;
135
136 s_align[dna_pos-1] = splice_state;
137 dna_pos-- ;
138 e_align[est_pos-1] = est_state ; //1 or 2, depended
139 est_pos-- ;
140
141 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
142 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
143
144 prbnum = prb[i-1];
145 chastitynum = chastity[i-1];
146 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
147
148 mparam[mlen*dnanum +estnum] ++ ;
149 }
150 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
151 (*result_length_ptr)= (*result_length_ptr) + 1;
152
153 s_align[dna_pos-1] = splice_state;
154 dna_pos-- ;
155
156 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
157 estnum = 0 ; //gap
158 mparam[mlen*dnanum +estnum] ++ ;
159 }
160 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
161 (*result_length_ptr)= (*result_length_ptr) + 1;
162
163 e_align[est_pos-1] = est_state ; //1 or 2, depended
164 est_pos-- ;
165
166 dnanum = 0 ; //gap
167 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
168
169 prbnum = prb[i-1];
170 chastitynum = chastity[i-1];
171 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
172
173 mparam[mlen*dnanum +estnum] ++ ;
174 }
175 else {// splice site
176 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
177 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
178 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
179 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
180 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
181 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
182 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
183 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
184 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
185 }
186 if (est_state==1) { //was exon labeled "1"
187 est_state = 2 ; //new exon is labeled "2"
188 }
189 else {
190 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
191 }
192 for (j; j > prev_j; j--) {
193 if (splice_state == 0) { //coming from exon
194 splice_state = 2 ; //first intron_state: acceptor
195 } else {
196 splice_state = 3 ; //intron
197 }
198 if (j-1 == prev_j) { //last intron_state: donor
199 splice_state = 1 ;//donor
200 }
201 s_align[dna_pos-1] = splice_state;
202 dna_pos-- ;
203 }
204 splice_state = 0 ; //exon again
205
206 }
207
208 startend[1] = i;
209 startend[2] = j;
210 i = prev_i;
211 j = prev_j;
212 act_m_no = prev_m_no ;
213
214 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
215 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
216 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
217 }
218 for (dna_pos; dna_pos>0; dna_pos--) {
219 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
220 }
221 for (est_pos; est_pos>0; est_pos--) {
222 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
223 }
224
225 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
226 //printf("result_length: %d\n", *result_length_ptr) ;
227
228
229 return 0;
230 }
231
232