20dd4ebf7ce913d2b2aa5ff6775775353c98074d
[qpalma.git] / QPalmaDP / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3
4 using namespace std;
5
6 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
7
8 penalty_struct currentStruct = qparam[(estnum-1)*6+(dnanum)];
9 printf("Current index %d dna/est: %d %d\n",(estnum-1)*6+(dnanum),dnanum,estnum);
10
11 printf("before\n");
12 int p_idx;
13 for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
14 printf("%f ",currentStruct.limits[p_idx]);
15 }
16 printf("\n");
17
18 for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
19 printf("%f ",currentStruct.penalties[p_idx]);
20 }
21 printf("\n");
22
23 double value = estprb;
24 int Lower = 0;
25 int idx;
26 for (idx=0;idx<currentStruct.len;idx++) {
27 if (currentStruct.limits[idx] <= value)
28 Lower++;
29 }
30
31 if (Lower == 0)
32 currentStruct.penalties[0] += 1;
33
34 if (Lower == currentStruct.len)
35 currentStruct.penalties[currentStruct.len-1] += 1;
36
37 Lower -= 1;
38 int Upper = Lower+1; // x-werte bleiben fest
39
40 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
41 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
42 currentStruct.penalties[Upper] += weightup;
43 currentStruct.penalties[Lower] += weightlow;
44
45 //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
46
47 qparam[(estnum-1)*5+(dnanum-1)] = currentStruct;
48
49 printf("after\n");
50 for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
51 printf("%f ",currentStruct.limits[p_idx]);
52 }
53 printf("\n");
54 for(p_idx=0;p_idx<currentStruct.len;p_idx++)
55 printf("%f ",currentStruct.penalties[p_idx]);
56 printf("\n");
57 }
58
59 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
60 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
61
62 {
63 const int mlen=6; // length of matchmatrix
64 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
65 startend[3] = max_score_positions[2*z] ; //i (est)
66 startend[4] = max_score_positions[2*z +1] ; //j (dna)
67 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
68 int dnanum ; //using check_char: letter -> number
69 int estnum ; //using check_char: letter -> number
70
71 double prbnum ;
72 double chastitynum ;
73
74 int dna_pos ;
75 int est_pos ;
76 int splice_state ;
77 int est_state ;
78 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
79 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
80 }
81 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
82 e_align[est_pos-1] = 4 ;
83 }
84 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
85
86
87 /* starting point for backtracking */
88 int i = startend[3] ;
89 int j = startend[4] ;
90 int act_m_no = z ;
91 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
92 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
93
94 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
95 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
96 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
97
98 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
99 {
100 cout << "Kein weiterer Pfad vorhanden\n";
101 return 1;
102 }
103
104 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
105 cout << "Kein weiterer Pfad vorhanden\n";
106 return 1;
107 }
108
109 assert(dna_pos == j) ;
110 assert(est_pos == i) ;
111 splice_state = 0 ; //exon
112 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
113
114 //compute length of alignment (between max(m,n) and m+n)
115 //local_alignment: backtracking to first Zero: look at actual value
116 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
117
118 //just some info
119 //if ((z>0) && (!(z==prev_m_no))) {
120 // printf("(%d,%d): next step in next matrix\n", i,j) ;
121 //}
122
123 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
124 (*result_length_ptr)= (*result_length_ptr) + 1;
125
126 s_align[dna_pos-1] = splice_state;
127 dna_pos-- ;
128 e_align[est_pos-1] = est_state ; //1 or 2, depended
129 est_pos-- ;
130
131 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
132 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
133
134 prbnum = prb[i-1];
135 chastitynum = chastity[i-1];
136
137 if(currentMode == USE_QUALITY_SCORES)
138 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
139 else
140 mparam[mlen*dnanum+estnum] ++ ;
141
142 }
143 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
144 (*result_length_ptr)= (*result_length_ptr) + 1;
145
146 s_align[dna_pos-1] = splice_state;
147 dna_pos-- ;
148
149 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
150 estnum = 0 ; //gap
151 mparam[mlen*dnanum +estnum] ++ ;
152 }
153 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
154 (*result_length_ptr)= (*result_length_ptr) + 1;
155
156 e_align[est_pos-1] = est_state ; //1 or 2, depended
157 est_pos-- ;
158
159 dnanum = 0 ; //gap
160 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
161
162 prbnum = prb[i-1];
163 chastitynum = chastity[i-1];
164
165 if(currentMode == USE_QUALITY_SCORES)
166 increaseFeatureCount(qparam,dnanum,estnum,prbnum);
167 else
168 mparam[mlen*dnanum +estnum] ++ ;
169 }
170 else {// splice site
171 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
172 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
173 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
174 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
175 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
176 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
177 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
178 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
179 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
180 }
181 if (est_state==1) { //was exon labeled "1"
182 est_state = 2 ; //new exon is labeled "2"
183 }
184 else {
185 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
186 }
187 for (j; j > prev_j; j--) {
188 if (splice_state == 0) { //coming from exon
189 splice_state = 2 ; //first intron_state: acceptor
190 } else {
191 splice_state = 3 ; //intron
192 }
193 if (j-1 == prev_j) { //last intron_state: donor
194 splice_state = 1 ;//donor
195 }
196 s_align[dna_pos-1] = splice_state;
197 dna_pos-- ;
198 }
199 splice_state = 0 ; //exon again
200
201 }
202
203 startend[1] = i;
204 startend[2] = j;
205 i = prev_i;
206 j = prev_j;
207 act_m_no = prev_m_no ;
208
209 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
210 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
211 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
212 }
213 for (dna_pos; dna_pos>0; dna_pos--) {
214 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
215 }
216 for (est_pos; est_pos>0; est_pos--) {
217 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
218 }
219
220 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
221 //printf("result_length: %d\n", *result_length_ptr) ;
222
223
224 return 0;
225 }