+ rearranged code
[qpalma.git] / dyn_prog / qpalma_dp.cpp
1 #include "qpalma_dp.h"
2 #include <cstring>
3
4 using namespace std;
5
6 /*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
7 myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
8 print_matrix) */
9
10 Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
11 len = 0;
12 limits = 0;
13 penalties = 0;
14 max_len = 0;
15 min_len = 0;
16 cache = 0;
17 enum ETransformType transform = T_LINEAR;
18 id = 0;
19 name = 0;
20 use_svm = 0;
21
22 // set ptrs to zero first
23 splice_align = 0;
24 est_align = 0;
25 mmatrix_param = 0;
26 alignmentscores = 0;
27 qualityScoresAllPaths = 0;
28 mlen = 6; // score matrix: length of 6 for "- A C G T N"
29
30 numQualSuppPoints = numq;
31 numPlifs = numQPlifs;
32 use_quality_scores = use_qscores;
33
34 //printf("number of support points: %d\n",numQualSuppPoints);
35 //printf("number of plifs: %d\n",numPlifs );
36 FA( numQualSuppPoints >= 0 );
37 FA( numPlifs >= 0 );
38 }
39
40 void Alignment::getDNAEST(){}
41
42 void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
43 int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
44 double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores,
45 bool remove_duplicate_scores, bool print_matrix) {
46
47 // printf("Entering myalign...\n");
48 dna_len = dna_len_p + 1 ;
49 est_len = est_len_p + 1 ;
50 nr_paths = nr_paths_p;
51
52 /***************************************************************************/
53 // initialize alignment matrices and call fill_matrix()
54 /***************************************************************************/
55
56 mode currentMode;
57 if (use_quality_scores)
58 currentMode = USE_QUALITY_SCORES;
59 else
60 currentMode = NORMAL;
61
62 // dnaest
63 DNA_ARRAY = 0;
64 EST_ARRAY = 0;
65
66 //possible donor positions
67 int nr_donor_sites = 0 ;
68 for (int ii=0; ii<d_len; ii++) {
69 if(isfinite(donor[ii])) {
70 nr_donor_sites++ ;
71 }
72 }
73
74 int* donor_sites = new int[nr_donor_sites];
75 int donor_idx = 0 ;
76 for (int ii=0; ii<d_len; ii++) {
77 if(isfinite(donor[ii])) {
78 donor_sites[donor_idx] = ii+1 ;
79 donor_idx++ ;
80 }
81 }
82
83 int* max_score_positions = new int[nr_paths*2];
84
85 Pre_score** matrices = new Pre_score*[nr_paths];
86 for (int z=0; z<nr_paths; z++) {
87 matrices[z] = new Pre_score[dna_len * est_len];
88 }
89
90 //printf("qualityScores\n");
91 //for(int qidx=0;qidx<numPlifs;qidx++) {
92 // penalty_struct p;
93 // p = qualityScores[qidx];
94 // printf("%d ",qidx);
95 // for(int pidx=0;pidx<p.len;pidx++)
96 // printf("%f ",pidx,p.penalties[pidx]);
97 // printf("\n");
98 //}
99 //
100
101 //int h_idx;
102 //for(h_idx=0;h_idx<h.len;h_idx++)
103 // printf("%d %f %f\n",h_idx,h.limits[h_idx],h.penalties[h_idx]);
104
105 //printf("calling fill_matrix...\n");
106 fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions,currentMode);
107 //printf("after call to fill_matrix...\n");
108
109 /***************************************************************************/
110 // return arguments etc.
111 /***************************************************************************/
112 int result_length; //Eine Variable fuer alle Matrizen
113
114 splice_align_size = (dna_len-1)*nr_paths;
115 est_align_size = (est_len-1)*nr_paths;
116
117 int mmatrix_size;
118
119 if (currentMode == USE_QUALITY_SCORES) {
120 mmatrix_param_size = mlen*nr_paths;
121 mmatrix_size = mlen;
122 }
123
124 if (currentMode == NORMAL) {
125 mmatrix_param_size = (mlen*mlen)*nr_paths;
126 mmatrix_size = mlen*mlen;
127 }
128
129 alignmentscores_size = nr_paths; //alignment score for each path/matrix
130 numPathsPlifs = numPlifs*nr_paths; //alignment score for each path/matrix
131
132 splice_align = new int[splice_align_size];
133 est_align = new int[est_align_size];
134 mmatrix_param = new int[mmatrix_param_size];
135 alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
136
137
138 // printf("before memset...\n");
139 memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
140 memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
141 memset((char*)mmatrix_param, 0, mmatrix_size*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
142 memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
143 //printf("after memset...\n");
144
145 qualityScoresAllPaths= new penalty_struct*[nr_paths];
146
147 for (int z=0; z<nr_paths; z++) {
148 result_length = 0 ;
149
150 int* s_align = splice_align + (dna_len-1)*z; //pointer
151 int* e_align = est_align + (est_len-1)*z ; //pointer
152 int* mparam = mmatrix_param + mmatrix_size*z; //pointer
153
154 qualityScoresAllPaths[z] = new penalty_struct[numPlifs];
155
156 int qidx, pidx;
157 for(qidx=0;qidx<numPlifs;qidx++) {
158 penalty_struct p;
159 p.len = numQualSuppPoints;
160 p.limits = (REAL*) calloc(p.len,sizeof(REAL));
161
162 for(pidx=0;pidx<p.len;pidx++)
163 p.limits[pidx] = qualityScores[qidx%numPlifs].limits[pidx];
164
165 p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
166 qualityScoresAllPaths[z][qidx] = p;
167 }
168
169 //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
170
171 //printf("before call to result_align...\n");
172 bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityScoresAllPaths[z] , currentMode);
173 //printf("after call to result_align...\n");
174
175 //printf("z is %d\n",z);
176 //for(qidx=0;qidx<numPlifs;qidx++) {
177 // penalty_struct p;
178 // p = qualityScoresAllPaths[qidx+numPlifs*z];
179 // printf("%d ",qidx);
180 // for(pidx=0;pidx<p.len;pidx++)
181 // printf("%f ",pidx,p.penalties[pidx]);
182 // printf("\n");
183 //}
184 //
185 if(z==0) {
186
187 if(DNA_ARRAY != 0) {
188 delete[] DNA_ARRAY;
189 delete[] EST_ARRAY;
190 }
191
192 result_len = result_length;
193
194 DNA_ARRAY = new int[result_length];
195 EST_ARRAY = new int[result_length];
196
197 //backtracking
198 int i = max_score_positions[2*z] ; //i (est)
199 int j = max_score_positions[2*z +1] ; //j (dna)
200 int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i;
201 int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
202 int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
203
204 for (int ii=result_length; ii>0; ii--) {
205 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
206 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
207 EST_ARRAY[ii-1] = check_char(est[i-1]) ;
208 }
209 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
210 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
211 EST_ARRAY[ii-1] = 0 ; //gap
212 }
213 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
214 DNA_ARRAY[ii-1] = 0 ; //gap
215 EST_ARRAY[ii-1] = check_char(est[i-1]) ;
216 }
217 else {// splice site
218 for (j; j > prev_j; j--) {
219 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
220 EST_ARRAY[ii-1] = 6 ; //intron
221 ii-- ;
222 }
223 ii++ ; // last ii-- too much (because done in for-loop)
224 }
225
226 i = prev_i;
227 j = prev_j;
228 prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i;
229 prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j;
230 prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
231 }
232
233 } // end of "if z == 0"
234
235 } //end of z
236
237 for (int i=nr_paths-1;i>=0; i--)
238 delete[] matrices[i];
239 }
240
241 void Alignment::getAlignmentResults(int* s_align, int* e_align,
242 int* mmatrix_p, double* alignscores, double* qScores) {
243
244 int idx;
245 for(idx=0; idx<splice_align_size; idx++)
246 s_align[idx] = splice_align[idx];
247
248 for(idx=0; idx<est_align_size; idx++)
249 e_align[idx] = est_align[idx];
250
251 for(idx=0; idx<mmatrix_param_size; idx++)
252 mmatrix_p[idx] = mmatrix_param[idx];
253
254 for(idx=0; idx<alignmentscores_size; idx++)
255 alignscores[idx] = alignmentscores[idx];
256
257 if (use_quality_scores) {
258 penalty_struct currentPlif;
259 int ctr=0;
260 for (int z=0; z<nr_paths; z++) {
261 for(int estChar=1;estChar<6;estChar++) {
262 for(int dnaChar=0;dnaChar<6;dnaChar++) {
263 int currentPos = (estChar-1)*6+dnaChar;
264 currentPlif = qualityScoresAllPaths[z][currentPos];
265 for(int pidx=0; pidx<currentPlif.len; pidx++) {
266 qScores[ctr] = currentPlif.penalties[pidx];
267 ctr++;
268 }
269 }}}
270 }
271
272 //for(idx=0; idx<numPathsPlifs; idx++) {
273 // currentPlif = qualityScoresAllPaths[idx];
274 // //printf("Size is %dx%d\n",numPathsPlifs,currentPlif.len);
275 // for(pidx=0; pidx<currentPlif.len; pidx++) {
276 // qScores[ctr] = currentPlif.penalties[pidx];
277 // ctr++;
278 // }
279 //}
280
281 //printf("\nctr is %d\n",ctr);
282 //printf("Leaving getAlignmentResults...\n");
283 }
284
285 void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
286
287 int idx;
288 for(idx=0; idx<result_len; idx++) {
289 dna_align[idx] = DNA_ARRAY[idx];
290 est_align[idx] = EST_ARRAY[idx];
291 }
292 }