+ renamed dyn_prog directory
[qpalma.git] / DynProg / qpalma_dp.cpp
1
2 #include "qpalma_dp.h"
3 #include <cstring>
4
5 using namespace std;
6
7 /*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
8 myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
9 print_matrix) */
10
11 Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
12 len = 0;
13 limits = 0;
14 penalties = 0;
15 max_len = 0;
16 min_len = 0;
17 cache = 0;
18 //enum ETransformType transform = T_LINEAR;
19 id = 0;
20 name = 0;
21 use_svm = 0;
22
23 // set ptrs to zero first
24 splice_align = 0;
25 est_align = 0;
26 mmatrix_param = 0;
27 alignmentscores = 0;
28 qualityFeaturesAllPaths = 0;
29 mlen = 6; // score matrix: length of 6 for "- A C G T N"
30
31 numQualSuppPoints = numq;
32 numPlifs = numQPlifs;
33 use_quality_scores = use_qscores;
34
35 //printf("number of support points: %d\n",numQualSuppPoints);
36 //printf("number of plifs: %d\n",numPlifs );
37 FA( numQualSuppPoints >= 0 );
38 FA( numPlifs >= 0 );
39 }
40
41 void Alignment::getDNAEST(){}
42
43 void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
44 int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
45 double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores,
46 bool remove_duplicate_scores, bool print_matrix) {
47
48 // printf("Entering myalign...\n");
49 dna_len = dna_len_p + 1 ;
50 est_len = est_len_p + 1 ;
51 nr_paths = nr_paths_p;
52
53 /***************************************************************************/
54 // initialize alignment matrices and call fill_matrix()
55 /***************************************************************************/
56
57 mode currentMode;
58 if (use_quality_scores)
59 currentMode = USE_QUALITY_SCORES;
60 else
61 currentMode = NORMAL;
62
63 // dnaest
64 DNA_ARRAY = 0;
65 EST_ARRAY = 0;
66
67 //printf("[qpalma_dp] read is: %s\n",est);
68
69 //possible donor positions
70 int nr_donor_sites = 0 ;
71 for (int ii=0; ii<d_len; ii++) {
72 if(isfinite(donor[ii])) {
73 nr_donor_sites++ ;
74 }
75 }
76
77 int* donor_sites = new int[nr_donor_sites];
78 int donor_idx = 0 ;
79 for (int ii=0; ii<d_len; ii++) {
80 if(isfinite(donor[ii])) {
81 donor_sites[donor_idx] = ii+1 ;
82 donor_idx++ ;
83 }
84 }
85
86 int* max_score_positions = new int[nr_paths*2];
87
88 Pre_score** matrices = new Pre_score*[nr_paths];
89 //for (int z=0; z<nr_paths; z++) {
90 for (size_t z=0; z<nr_paths; z++) {
91 matrices[z] = new Pre_score[dna_len * est_len];
92 }
93
94 //printf("qualityScores\n");
95 //for(int qidx=0;qidx<numPlifs;qidx++) {
96 // penalty_struct p;
97 // p = qualityScores[qidx];
98 // printf("%d ",qidx);
99 // for(int pidx=0;pidx<p.len;pidx++)
100 // printf("%f ",pidx,p.penalties[pidx]);
101 // printf("\n");
102 //}
103
104 //int h_idx;
105 //for(h_idx=0;h_idx<h.len;h_idx++)
106 // printf("%d %f %f\n",h_idx,h.limits[h_idx],h.penalties[h_idx]);
107
108 //printf("calling fill_matrix...\n");
109 fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
110 //printf("after call to fill_matrix...\n");
111
112 /***************************************************************************/
113 // return arguments etc.
114 /***************************************************************************/
115 int result_length; //Eine Variable fuer alle Matrizen
116
117 splice_align_size = (dna_len-1)*nr_paths;
118 est_align_size = (est_len-1)*nr_paths;
119
120 int mmatrix_size;
121
122 if (currentMode == USE_QUALITY_SCORES) {
123 mmatrix_param_size = mlen*nr_paths;
124 mmatrix_size = mlen;
125 }
126
127 if (currentMode == NORMAL) {
128 mmatrix_param_size = (mlen*mlen)*nr_paths;
129 mmatrix_size = mlen*mlen;
130 }
131
132 alignmentscores_size = nr_paths; //alignment score for each path/matrix
133 numPathsPlifs = numPlifs*nr_paths; //alignment score for each path/matrix
134
135 splice_align = new int[splice_align_size];
136 est_align = new int[est_align_size];
137 mmatrix_param = new int[mmatrix_param_size];
138 alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
139
140
141 // printf("before memset...\n");
142 memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
143 memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
144 memset((char*)mmatrix_param, 0, mmatrix_size*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
145 memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
146 //printf("after memset...\n");
147
148 qualityFeaturesAllPaths= new penalty_struct*[nr_paths];
149
150 //for (int z=0; z<nr_paths; z++) {
151 for (size_t z=0; z<nr_paths; z++) {
152 result_length = 0 ;
153
154 int* s_align = splice_align + (dna_len-1)*z; //pointer
155 int* e_align = est_align + (est_len-1)*z ; //pointer
156 int* mparam = mmatrix_param + mmatrix_size*z; //pointer
157
158 qualityFeaturesAllPaths[z] = new penalty_struct[numPlifs];
159
160 int qidx, pidx;
161 for(qidx=0;qidx<numPlifs;qidx++) {
162 penalty_struct p;
163 init_penalty_struct(p);
164 p.len = numQualSuppPoints;
165 p.limits = (double*) calloc(p.len,sizeof(double));
166
167 for(pidx=0;pidx<p.len;pidx++)
168 p.limits[pidx] = qualityScores[qidx].limits[pidx];
169
170 p.penalties = (double*) calloc(p.len,sizeof(double));
171 qualityFeaturesAllPaths[z][qidx] = p;
172 }
173
174 //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
175
176 //printf("before call to result_align...\n");
177 bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityFeaturesAllPaths[z] , currentMode);
178 //printf("after call to result_align...\n");
179
180 //printf("z is %d\n",z);
181 //int len;
182 //for(qidx=0;qidx<numPlifs;qidx++) {
183 // penalty_struct p;
184 // p = qualityScoresAllPaths[z][qidx];
185 // printf("%d: ",qidx);
186 // for(pidx=0;pidx<p.len;pidx++)
187 // printf("%f ",p.limits[pidx]);
188 // printf("\n");
189
190 // for(pidx=0;pidx<p.len;pidx++)
191 // printf("%f ",p.penalties[pidx]);
192 // printf("\n");
193 //}
194
195 if(z==0) {
196
197 if(DNA_ARRAY != 0) {
198 delete[] DNA_ARRAY;
199 delete[] EST_ARRAY;
200 }
201
202 result_len = result_length;
203
204 DNA_ARRAY = new int[result_length];
205 EST_ARRAY = new int[result_length];
206
207 //backtracking
208 int i = max_score_positions[2*z] ; //i (est)
209 int j = max_score_positions[2*z +1] ; //j (dna)
210 int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i;
211 int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
212 int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
213
214 for (int ii=result_length; ii>0; ii--) {
215 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
216 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
217 EST_ARRAY[ii-1] = check_char(est[i-1]) ;
218 }
219 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
220 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
221 EST_ARRAY[ii-1] = 0 ; //gap
222 }
223 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
224 DNA_ARRAY[ii-1] = 0 ; //gap
225 EST_ARRAY[ii-1] = check_char(est[i-1]) ;
226 }
227 else {// splice site
228 for (j; j > prev_j; j--) {
229 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
230 EST_ARRAY[ii-1] = 6 ; //intron
231 ii-- ;
232 }
233 ii++ ; // last ii-- too much (because done in for-loop)
234 }
235
236 i = prev_i;
237 j = prev_j;
238 prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i;
239 prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j;
240 prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
241 }
242
243 } // end of "if z == 0"
244
245 } //end of z
246
247 for (int i=nr_paths-1;i>=0; i--)
248 delete[] matrices[i];
249 }
250
251 void Alignment::getAlignmentResults(int* s_align, int* e_align,
252 int* mmatrix_p, double* alignscores, double* qScores) {
253
254 size_t idx;
255 for(idx=0; idx<splice_align_size; idx++)
256 s_align[idx] = splice_align[idx];
257
258 for(idx=0; idx<est_align_size; idx++)
259 e_align[idx] = est_align[idx];
260
261 for(idx=0; idx<mmatrix_param_size; idx++)
262 mmatrix_p[idx] = mmatrix_param[idx];
263
264 for(idx=0; idx<alignmentscores_size; idx++)
265 alignscores[idx] = alignmentscores[idx];
266
267 if (use_quality_scores) {
268 penalty_struct currentPlif;
269 int ctr=0;
270 //for (int z=0; z<nr_paths; z++) {
271 for (size_t z=0; z<nr_paths; z++) {
272 for(int estChar=1;estChar<6;estChar++) {
273 for(int dnaChar=0;dnaChar<6;dnaChar++) {
274
275 int currentPos = (estChar-1)*6+dnaChar;
276 currentPlif = qualityFeaturesAllPaths[z][currentPos];
277
278 for(int pidx=0; pidx<currentPlif.len; pidx++) {
279 qScores[ctr] = currentPlif.penalties[pidx];
280 //printf("%f ",qScores[ctr]);
281 ctr++;
282 }
283 //printf("\n");
284 }}}
285
286 //printf("\nctr is %d\n",ctr);
287 }
288 //printf("Leaving getAlignmentResults...\n");
289 }
290
291 void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
292
293 size_t idx;
294 for(idx=0; idx<result_len; idx++) {
295 dna_align[idx] = DNA_ARRAY[idx];
296 est_align[idx] = EST_ARRAY[idx];
297 }
298 }
299
300
301 PyObject* Alignment::getAlignmentResultsNew() {
302
303 PyObject* py_splice_align = PyList_New(splice_align_size);
304 PyObject* py_est_align = PyList_New(est_align_size);
305 PyObject* py_mmatrix = PyList_New(mmatrix_param_size);
306 PyObject* py_align_scores = PyList_New(alignmentscores_size);
307 PyObject* py_q_scores = PyList_New(0);
308
309 size_t idx;
310 for(idx=0; idx<splice_align_size; idx++)
311 PyList_SetItem(py_splice_align,idx,PyInt_FromLong(splice_align[idx]));
312 //s_align[idx] = splice_align[idx];
313
314 for(idx=0; idx<est_align_size; idx++)
315 PyList_SetItem(py_est_align,idx,PyInt_FromLong(est_align[idx]));
316
317 //e_align[idx] = est_align[idx];
318
319 for(idx=0; idx<mmatrix_param_size; idx++)
320 PyList_SetItem(py_mmatrix,idx,PyInt_FromLong(mmatrix_param[idx]));
321
322 //mmatrix_p[idx] = mmatrix_param[idx];
323
324 for(idx=0; idx<alignmentscores_size; idx++)
325 PyList_SetItem(py_align_scores,idx,PyFloat_FromDouble(alignmentscores[idx]));
326
327 //alignscores[idx] = alignmentscores[idx];
328
329 if (use_quality_scores) {
330 penalty_struct currentPlif;
331 int ctr=0;
332 for (size_t z=0; z<nr_paths; z++) {
333 //for (int z=0; z<nr_paths; z++) {
334 for(int estChar=1;estChar<6;estChar++) {
335 for(int dnaChar=0;dnaChar<6;dnaChar++) {
336
337 int currentPos = (estChar-1)*6+dnaChar;
338 currentPlif = qualityFeaturesAllPaths[z][currentPos];
339
340 for(int pidx=0; pidx<currentPlif.len; pidx++) {
341 //qScores[ctr] = currentPlif.penalties[pidx];
342 //printf("%f ",qScores[ctr]);
343 PyList_Append(py_q_scores,PyFloat_FromDouble(currentPlif.penalties[pidx]));
344 ctr++;
345 }
346 //printf("\n");
347 }}}
348 //printf("\nctr is %d\n",ctr);
349 }
350
351 PyObject* result = PyTuple_Pack(5,
352 py_splice_align, py_est_align, py_mmatrix,
353 py_align_scores, py_q_scores);
354
355 //PyTuple_SetItem(result,0,py_splice_align);
356 //PyTuple_SetItem(result,1,py_est_align);
357 //PyTuple_SetItem(result,2,py_mmatrix);
358 //PyTuple_SetItem(result,3,py_align_scores);
359 //PyTuple_SetItem(result,4,py_q_scores);
360
361 return result;
362 }
363
364 PyObject* Alignment::getAlignmentArraysNew() {
365
366 PyObject* py_dna_align = PyList_New(result_len);
367 PyObject* py_est_align = PyList_New(result_len);
368
369 for(size_t idx=0; idx<result_len; idx++) {
370 PyList_SetItem(py_dna_align,idx,PyInt_FromLong(DNA_ARRAY[idx]));
371 PyList_SetItem(py_est_align,idx,PyInt_FromLong(EST_ARRAY[idx]));
372 }
373
374 PyObject* result = PyTuple_Pack(2, py_dna_align, py_est_align);
375 return result;
376 }