+ added part of the feature calculation for the quality score values
[qpalma.git] / QPalmaDP / result_align.cpp
1 #include "assert.h"
2 #include "fill_matrix.h"
3
4 using namespace std;
5
6 void increaseFeatureCount(penalty_struct* qparam, int dnanum, int estnum, double estprb) {
7
8 penalty_struct currentStruct = qparam[estnum*6+dnanum];
9
10 double value = estprb;
11 int Lower = 0;
12 int idx;
13 for (idx=0;idx<currentStruct.len;idx++) {
14 if (currentStruct.limits[idx] <= value)
15 Lower++;
16 }
17
18 if (Lower == 0)
19 currentStruct.penalties[0] += 1;
20
21 if (Lower == currentStruct.len)
22 currentStruct.penalties[currentStruct.len-1] += 1;
23
24 Lower -= 1;
25 int Upper = Lower+1; // x-werte bleiben fest
26 double weightup = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
27 double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
28 currentStruct.penalties[Upper] = currentStruct.penalties[Upper] + weightup;
29 currentStruct.penalties[Lower] = currentStruct.penalties[Lower] + weightlow;
30 }
31
32 bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam)
33 //Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
34
35 {
36 const int mlen=6; // length of matchmatrix
37 int startend[4] ; // [i_start, j_start, i_end, j_end] ;
38 startend[3] = max_score_positions[2*z] ; //i (est)
39 startend[4] = max_score_positions[2*z +1] ; //j (dna)
40 //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
41 int dnanum ; //using check_char: letter -> number
42 int estnum ; //using check_char: letter -> number
43
44 double prbnum ;
45 double chastitynum ;
46
47 int dna_pos ;
48 int est_pos ;
49 int splice_state ;
50 int est_state ;
51 for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
52 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
53 }
54 for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
55 e_align[est_pos-1] = 4 ;
56 }
57 //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
58
59
60 /* starting point for backtracking */
61 int i = startend[3] ;
62 int j = startend[4] ;
63 int act_m_no = z ;
64 alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
65 //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
66
67 int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
68 int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
69 int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
70
71 if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
72 {
73 cout << "Kein weiterer Pfad vorhanden\n";
74 return 1;
75 }
76
77 if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
78 cout << "Kein weiterer Pfad vorhanden\n";
79 return 1;
80 }
81
82 assert(dna_pos == j) ;
83 assert(est_pos == i) ;
84 splice_state = 0 ; //exon
85 est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
86
87 //compute length of alignment (between max(m,n) and m+n)
88 //local_alignment: backtracking to first Zero: look at actual value
89 while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
90
91 //just some info
92 //if ((z>0) && (!(z==prev_m_no))) {
93 // printf("(%d,%d): next step in next matrix\n", i,j) ;
94 //}
95
96 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
97 (*result_length_ptr)= (*result_length_ptr) + 1;
98
99 s_align[dna_pos-1] = splice_state;
100 dna_pos-- ;
101 e_align[est_pos-1] = est_state ; //1 or 2, depended
102 est_pos-- ;
103
104 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
105 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
106
107 prbnum = prb[i-1];
108 chastitynum = chastity[i-1];
109 //increaseFeatureCount(qparam,dnanum,estnum,prbnum);
110
111 mparam[mlen*dnanum +estnum] ++ ;
112 }
113 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
114 (*result_length_ptr)= (*result_length_ptr) + 1;
115
116 s_align[dna_pos-1] = splice_state;
117 dna_pos-- ;
118
119 dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
120 estnum = 0 ; //gap
121 mparam[mlen*dnanum +estnum] ++ ;
122 }
123 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
124 (*result_length_ptr)= (*result_length_ptr) + 1;
125
126 e_align[est_pos-1] = est_state ; //1 or 2, depended
127 est_pos-- ;
128
129 dnanum = 0 ; //gap
130 estnum = check_char(est[i-1]) ; //-1 because index starts with 0
131
132 prbnum = prb[i-1];
133 chastitynum = chastity[i-1];
134 //increaseFeatureCount(qparam,dnanum,estnum,prbnum);
135
136 mparam[mlen*dnanum +estnum] ++ ;
137 }
138 else {// splice site
139 (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
140 if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
141 printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
142 printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
143 printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
144 printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
145 printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
146 printf("j: %d -> prev_j:%d\n", j, prev_j) ;
147 printf("i: %d -> prev_i:%d\n", i, prev_i) ;
148 }
149 if (est_state==1) { //was exon labeled "1"
150 est_state = 2 ; //new exon is labeled "2"
151 }
152 else {
153 est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
154 }
155 for (j; j > prev_j; j--) {
156 if (splice_state == 0) { //coming from exon
157 splice_state = 2 ; //first intron_state: acceptor
158 } else {
159 splice_state = 3 ; //intron
160 }
161 if (j-1 == prev_j) { //last intron_state: donor
162 splice_state = 1 ;//donor
163 }
164 s_align[dna_pos-1] = splice_state;
165 dna_pos-- ;
166 }
167 splice_state = 0 ; //exon again
168
169 }
170
171 startend[1] = i;
172 startend[2] = j;
173 i = prev_i;
174 j = prev_j;
175 act_m_no = prev_m_no ;
176
177 prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
178 prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
179 prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
180 }
181 for (dna_pos; dna_pos>0; dna_pos--) {
182 s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
183 }
184 for (est_pos; est_pos>0; est_pos--) {
185 e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
186 }
187
188 //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
189 //printf("result_length: %d\n", *result_length_ptr) ;
190
191
192 return 0;
193 }
194
195