c98b36b982c10565f32e26565a2a6cc8ce514f52
[qpalma.git] / QPalmaDP / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3
4 using namespace std;
5
6 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
7
8 //printf("Current index %d dna/est: %d %d\n",(estnum-1)*6+(dnanum),dnanum,estnum);
9 penalty_struct currentStruct = qparam[(estnum-1)*6+(dnanum)];
10
11 //printf("before\n");
12 //int p_idx;
13 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
14 // printf("%f ",currentStruct.limits[p_idx]);
15 //}
16 //printf("\n");
17
18 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
19 // printf("%f ",currentStruct.penalties[p_idx]);
20 //}
21 //printf("\n");
22
23 double value = estprb;
24 int Lower = 0;
25 int idx;
26 for (idx=0;idx<currentStruct.len;idx++) {
27 if (currentStruct.limits[idx] <= value)
28 Lower++;
29 }
30
31 if (Lower == 0) {
32 currentStruct.penalties[0] += 1;
33 return;
34 }
35
36 if (Lower == currentStruct.len) {
37 currentStruct.penalties[currentStruct.len-1] += 1;
38 return;
39 }
40
41 Lower -= 1;
42 int Upper = Lower+1; // x-werte bleiben fest
43
44 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
45 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
46 currentStruct.penalties[Upper] += weightup;
47 currentStruct.penalties[Lower] += weightlow;
48
49 //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
50
51 qparam[(estnum-1)*5+(dnanum-1)] = currentStruct;
52
53 //printf("after\n");
54 //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
55 // printf("%f ",currentStruct.limits[p_idx]);
56 //}
57 //printf("\n");
58 //for(p_idx=0;p_idx<currentStruct.len;p_idx++)
59 // printf("%f ",currentStruct.penalties[p_idx]);
60 //printf("\n");
61 }
62
63 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
64 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
65
66 {
67 const int mlen=6; // length of matchmatrix
68 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
69 startend[3] = max_score_positions[2*z] ; //i (est)
70 startend[4] = max_score_positions[2*z +1] ; //j (dna)
71 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
72 int dnanum ; //using check_char: letter -> number
73 int estnum ; //using check_char: letter -> number
74
75 double prbnum ;
76 double chastitynum ;
77
78
79 int dna_pos ;
80 int est_pos ;
81 int splice_state ;
82 int est_state ;
83 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
84 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
85 }
86 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
87 e_align[est_pos-1] = 4 ;
88 }
89 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
90
91
92 /* starting point for backtracking */
93 int i = startend[3] ;
94 int j = startend[4] ;
95 int act_m_no = z ;
96 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
97 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
98
99 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
100 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
101 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
102
103 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
104 {
105 cout << "Kein weiterer Pfad vorhanden\n";
106 return 1;
107 }
108
109 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
110 cout << "Kein weiterer Pfad vorhanden\n";
111 return 1;
112 }
113
114 assert(dna_pos == j) ;
115 assert(est_pos == i) ;
116 splice_state = 0 ; //exon
117 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
118
119
120 //compute length of alignment (between max(m,n) and m+n)
121 //local_alignment: backtracking to first Zero: look at actual value
122 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
123
124 //just some info
125 //if ((z>0) && (!(z==prev_m_no))) {
126 // printf("(%d,%d): next step in next matrix\n", i,j) ;
127 //}
128
129 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
130 (*result_length_ptr)= (*result_length_ptr) + 1;
131
132 s_align[dna_pos-1] = splice_state;
133 dna_pos-- ;
134 e_align[est_pos-1] = est_state ; //1 or 2, depended
135 est_pos-- ;
136
137 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
138 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
139
140 prbnum = prb[i-1];
141 chastitynum = chastity[i-1];
142
143 if(currentMode == USE_QUALITY_SCORES)
144 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
145 else
146 mparam[mlen*dnanum+estnum] ++ ;
147
148 }
149 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
150 (*result_length_ptr)= (*result_length_ptr) + 1;
151
152 s_align[dna_pos-1] = splice_state;
153 dna_pos-- ;
154
155 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
156 estnum = 0 ; //gap
157 mparam[mlen*dnanum +estnum] ++ ;
158 }
159 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
160 (*result_length_ptr)= (*result_length_ptr) + 1;
161
162 e_align[est_pos-1] = est_state ; //1 or 2, depended
163 est_pos-- ;
164
165 dnanum = 0 ; //gap
166 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
167
168 prbnum = prb[i-1];
169 chastitynum = chastity[i-1];
170
171 if(currentMode == USE_QUALITY_SCORES)
172 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
173 else
174 mparam[mlen*dnanum +estnum] ++ ;
175 }
176 else {// splice site
177 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
178 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
179 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
180 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
181 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
182 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
183 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
184 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
185 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
186 }
187 if (est_state==1) { //was exon labeled "1"
188 est_state = 2 ; //new exon is labeled "2"
189 }
190 else {
191 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
192 }
193 for (j; j > prev_j; j--) {
194 if (splice_state == 0) { //coming from exon
195 splice_state = 2 ; //first intron_state: acceptor
196 } else {
197 splice_state = 3 ; //intron
198 }
199 if (j-1 == prev_j) { //last intron_state: donor
200 splice_state = 1 ;//donor
201 }
202 s_align[dna_pos-1] = splice_state;
203 dna_pos-- ;
204 }
205 splice_state = 0 ; //exon again
206
207 }
208
209 startend[1] = i;
210 startend[2] = j;
211 i = prev_i;
212 j = prev_j;
213 act_m_no = prev_m_no ;
214
215 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
216 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
217 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
218 }
219 for (dna_pos; dna_pos>0; dna_pos--) {
220 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
221 }
222 for (est_pos; est_pos>0; est_pos--) {
223 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
224 }
225
226 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
227 //printf("result_length: %d\n", *result_length_ptr) ;
228
229
230 return 0;
231 }