+ added python parsers for gff and solexa / read files
[qpalma.git] / QPalmaDP / qpalma_dp.cpp
1 #include "qpalma_dp.h"
2 #include <cstring>
3 using namespace std;
4
5 /*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
6 myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
7 print_matrix) */
8
9 struct penalty_struct *qualityScores = 0;
10
11 Alignment::Alignment() {
12 len = 0;
13 limits = 0;
14 penalties = 0;
15 max_len = 0;
16 min_len = 0;
17 cache = 0;
18 enum ETransformType transform = T_LINEAR;
19 id = 0;
20 name = 0;
21 use_svm = 0;
22
23 splice_align = 0;
24 est_align = 0;
25 mmatrix_param = 0;
26 alignmentscores = 0;
27 qualityMatrix = 0;
28
29 ::qualityScores = new struct penalty_struct[24];
30 }
31
32 void Alignment::setQualityMatrix(double* qMat, int length){
33 if(qualityMatrix == 0)
34 qualityMatrix = new double[length];
35
36 for(int i=0; i<length; i++)
37 qualityMatrix[i] = qMat[i];
38 }
39
40
41 void Alignment::setMatchPlifs(struct penalty_struct match_plifs, int idx) {
42 struct penalty_struct *currentPlif = new struct penalty_struct();
43 copy_penalty_struct(&match_plifs,currentPlif);
44 ::qualityScores[idx] = (*currentPlif);
45 }
46
47 void Alignment::getDNAEST(){}
48
49 void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
50 int est_len_p, struct penalty_struct h, double* matchmatrix, int mm_len,
51 double* donor, int d_len, double* acceptor, int a_len,
52 bool remove_duplicate_scores, bool print_matrix) {
53
54 // printf("Entering myalign...\n");
55 mlen = 6; // score matrix: length of 6 for "- A C G T N"
56 dna_len = dna_len_p + 1 ;
57 est_len = est_len_p + 1 ;
58 nr_paths = nr_paths_p;
59
60 /***************************************************************************/
61 // initialize alignment matrices and call fill_matrix()
62 /***************************************************************************/
63
64 //possible donor positions
65 int nr_donor_sites = 0 ;
66 for (int ii=0; ii<d_len; ii++) {
67 if(isfinite(donor[ii])) {
68 nr_donor_sites++ ;
69 }
70 }
71
72 int* donor_sites = new int[nr_donor_sites];
73 int donor_idx = 0 ;
74 for (int ii=0; ii<d_len; ii++) {
75 if(isfinite(donor[ii])) {
76 donor_sites[donor_idx] = ii+1 ;
77 donor_idx++ ;
78 }
79 }
80
81 int* max_score_positions = new int[nr_paths*2];
82
83 Pre_score** matrices = new Pre_score*[nr_paths];
84 for (int z=0; z<nr_paths; z++) {
85 matrices[z] = new Pre_score[dna_len * est_len];
86 }
87
88 // printf("calling fill_matrix...\n");
89
90 fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, &h, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
91
92 // printf("after call to fill_matrix...\n");
93 /***************************************************************************/
94 // return arguments etc.
95 /***************************************************************************/
96 int result_length; //Eine Variable fuer alle Matrizen
97
98 // printf("before init of splice_align and rest...\n");
99
100 splice_align = new int[(dna_len-1)*nr_paths];
101 est_align = new int[(est_len-1)*nr_paths];
102 mmatrix_param = new int[(mlen*mlen)*nr_paths];
103 alignmentscores = new double[nr_paths]; //alignment score for each path/matrix
104
105 // printf("before memset...\n");
106
107 memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
108 memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
109 memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
110 memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
111
112 // printf("after memset...\n");
113 // dnaest
114 double *DNA_ARRAY = 0;
115 double *EST_ARRAY = 0;
116
117 for (int z=0; z<nr_paths; z++) {
118 result_length = 0 ;
119
120 int* s_align = splice_align + (dna_len-1)*z; //pointer
121 int* e_align = est_align + (est_len-1)*z ; //pointer
122 int* mparam = mmatrix_param + (mlen*mlen)*z; //pointer
123
124 bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, s_align, e_align, mparam, alignmentscores, max_score_positions);
125
126 if(DNA_ARRAY != 0) {
127 delete[] DNA_ARRAY;
128 delete[] EST_ARRAY;
129 }
130
131 DNA_ARRAY = new double[result_length];
132 EST_ARRAY = new double[result_length];
133
134 //backtracking
135 int i = max_score_positions[2*z] ; //i (est)
136 int j = max_score_positions[2*z +1] ; //j (dna)
137 int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i;
138 int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
139 int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
140
141 for (int ii=result_length; ii>0; ii--) {
142 if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
143 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
144 EST_ARRAY[ii-1] = check_char(est[i-1]) ;
145 }
146 else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
147 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
148 EST_ARRAY[ii-1] = 0 ; //gap
149 }
150 else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
151 DNA_ARRAY[ii-1] = 0 ; //gap
152 EST_ARRAY[ii-1] = check_char(est[i-1]) ;
153 }
154 else {// splice site
155 for (j; j > prev_j; j--) {
156 DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
157 EST_ARRAY[ii-1] = 6 ; //intron
158 ii-- ;
159 }
160 ii++ ; // last ii-- too much (because done in for-loop)
161 }
162
163 i = prev_i;
164 j = prev_j;
165 prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i;
166 prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j;
167 prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
168 }
169
170 } //end of z
171
172 if(DNA_ARRAY != 0) {
173 delete[] DNA_ARRAY;
174 delete[] EST_ARRAY;
175 }
176
177 for (int i=nr_paths-1;i>=0; i--)
178 delete[] matrices[i];
179
180 //printf("Leaving myalign...\n");
181 }
182
183 void Alignment::getAlignmentResults(int* s_align, int* e_align,
184 int* mmatrix_p, double* alignscores) {
185
186 // printf("Entering getAlignmentResults...\n");
187 uint splice_align_size = (dna_len-1)*nr_paths;
188 uint est_align_size = (est_len-1)*nr_paths;
189 uint mmatrix_param_size = (mlen*mlen)*nr_paths;
190 uint alignmentscores_size = nr_paths; //alignment score for each path/matrix
191
192 for(int idx=0; idx<splice_align_size; idx++)
193 s_align[idx] = splice_align[idx];
194
195 for(int idx=0; idx<est_align_size; idx++)
196 e_align[idx] = est_align[idx];
197
198 for(int idx=0; idx<mmatrix_param_size; idx++)
199 mmatrix_p[idx] = mmatrix_param[idx];
200
201 for(int idx=0; idx<alignmentscores_size; idx++)
202 alignscores[idx] = alignmentscores[idx];
203
204 // printf("Leaving getAlignmentResults...\n");
205 }
206