/* TODO
- max_len = 10000 for elegans!
- do not use double
-
*/
-
#include "fill_matrix.h"
using namespace std;
}
+/* if scoring_functions is a NULL pointer then we realize the "normal" scoring
+ * system of Palma. Otherwiese scoring_functions should point to an array of
+ * plifs scoring the respective pairs of characters together with the EST
+ * quality score.
+ *
+ *
+ */
+
+double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar, struct penalty_struct *scoring_functions) {
+ double score = 0.0;
+
+ if( scoring_functions == 0 ) {
+ if( estChar == -1 )
+ score = matchmatrix[mlen*dnaChar];
+
+ if( dnaChar == -1 )
+ score = matchmatrix[estChar];
+
+ if( estChar != -1 && dnaChar != -1 )
+ score = matchmatrix[mlen* dnaChar +estChar];
+ } else {
+ if( estChar == -1 )
+ score = matchmatrix[mlen*dnaChar];
+
+ if( dnaChar == -1 )
+ score = matchmatrix[estChar];
+
+ if( estChar != -1 && dnaChar != -1 )
+ //scoring_functions
+ score = matchmatrix[mlen* dnaChar +estChar];
+ }
+
+ return score;
+}
void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, penalty_struct* functions , double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
double max_scores[nr_paths] ;
+ /*********************************************************************************************/
+ // The penalty functions
+ /*********************************************************************************************/
+
+ /* scoring_functions is an array of size 4x6
+ *
+ */
+
+ struct penalty_struct* scoring_functions = 0;
+
/*********************************************************************************************/
// Initialisation of all alignment matrices, columnwise
/*********************************************************************************************/
prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
if (isfinite(prevValue))
{
- tempValue = prevValue +(matchmatrix[mlen* dnaChar]); /* score(gap,DNA) */
+ // tempValue = prevValue +(matchmatrix[mlen* dnaChar]); /* score(gap,DNA) */
+ tempValue = prevValue + getScore(matchmatrix,mlen,dnaChar,-1,scoring_functions);
if (isfinite(tempValue))
{
assert(num_elem<max_num_elem) ;
prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
if (isfinite(prevValue))
{
- tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);//DNA mit EST
+ //tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);//DNA mit EST
+ tempValue = prevValue + getScore(matchmatrix,mlen,dnaChar,estChar,scoring_functions);
if (isfinite(tempValue))
{
assert(num_elem<max_num_elem) ;
prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
if (isfinite(prevValue))
{
- tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
+ // tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
+ tempValue = prevValue + getScore(matchmatrix,mlen,-1,estChar,scoring_functions);
if (isfinite(tempValue))
{
assert(num_elem<max_num_elem) ;
extern void print_align(Pre_score* matrix, int length_est, int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar);
+
#endif // _FILLMATRIX_H__
mmatrix_param = 0;
alignmentscores = 0;
qualityMatrix = 0;
+ matching_plifs = 0;
}
void Alignment::setQualityMatrix(double* qMat, int length){
for(int i=0; i<length; i++)
qualityMatrix[i] = qMat[i];
}
-void Alignment::getSpliceAlign(){}
-void Alignment::getEstAlign(){}
-void Alignment::getWeightMatch(){}
-void Alignment::getTotalScores(){}
+
+void Alignment::setMatchPlifs(struct penalty_struct* match_plifs) {
+ matching_plifs = new struct penalty_struct[10];
+
+ for(int i=0; i<24; i++)
+ matching_plifs[i] = match_plifs[i];
+}
+
void Alignment::getDNAEST(){}
void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
bool remove_duplicate_scores, bool print_matrix) {
// printf("Entering myalign...\n");
-
mlen = 6; // score matrix: length of 6 for "- A C G T N"
dna_len = dna_len_p + 1 ;
est_len = est_len_p + 1 ;
int mlen;
int nr_paths;
+ struct penalty_struct* matching_plifs;
+
INT len;
REAL *limits;
REAL *penalties;
delete[] qualityMatrix;
}
-void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+ void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
int est_len_p, struct penalty_struct h, double* matchmatrix, int mm_len,
double* donor, int d_len, double* acceptor, int a_len,
bool remove_duplicate_scores, bool print_matrix);
+ void setMatchPlifs(struct penalty_struct* match_plifs);
+
void penSetLength(int l) { len = l; }
void penSetLimits(REAL* lts) {
limits = new REAL[len];
}
void setQualityMatrix(double* qMat, int length);
- void getSpliceAlign();
- void getEstAlign();
- void getWeightMatch();
- void getTotalScores();
void getDNAEST();
void getAlignmentResults(int* s_align, int* e_align,
int* mmatrix_p, double* alignscores);
-
-
};
-
#endif // _QPALMA_DP_H_
import Configuration
+
+
+def initializeQualityScoringFunctions(numPlifs,numSuppPoints):
+
+ allPlifs = [0]*numPlifs
+
+ for idx in range(numPlifs):
+
+ curPlif = Plf()
+ curPlif.limits = linspace(min_svm_score,max_svm_score,numSuppPoints)
+ curPlif.penalties = [0]*numSuppPoints
+ curPlif.transform = ''
+ curPlif.name = ''
+ curPlif.max_len = 100
+ curPlif.min_len = -100
+ curPlif.id = 1
+ curPlif.use_svm = 0
+ curPlif.next_id = 0
+
+ allPlifs[idx] = d
+
+ return allPlifs
+
+
class QPalma:
"""
A training method for the QPalma project
self.ARGS = Param()
self.logfh = open('qpalma.log','w+')
-
gen_file= '%s/genome.config' % self.ARGS.basedir
cmd = ['']*4
self.C=1.0
+ # 'normal' means work like Palma
+ # 'using_quality_scores' means work like Palma plus using sequencing
+ # quality scores
+ self.mode = 'normal'
+ #self.mode = 'using_quality_scores'
+
+ # Here we specify the total number of parameters.
+ # When using quality scores our scoring function is defined as
+ #
+ # f: S x R x S -> R
+ #
+ # as opposed to a usage without quality scores when we only have
+ #
+ # f: S x S -> R
+ #
self.numDonSuppPoints = 30
self.numAccSuppPoints = 30
self.numLengthSuppPoints = 30
- self.sizeMMatrix = 36
- self.numQualSuppPoints = 16*0
+ if self.mode == 'normal':
+ self.sizeMMatrix = 36
+ elif self.mode == 'using_quality_scores':
+ self.sizeMMatrix = 728
+ else:
+ assert False, 'Wrong operation mode specified'
+
+ # this number defines the number of support points for one tuple (a,b)
+ # where 'a' comes with a quality score
+ self.numQualSuppPoints = 30
+ self.numQualSuppPoints = 0
- self.numFeatures = self.numDonSuppPoints + self.numAccSuppPoints + self.numLengthSuppPoints\
- + self.sizeMMatrix + self.numQualSuppPoints
+ self.numFeatures = self.numDonSuppPoints + self.numAccSuppPoints\
+ + self.numLengthSuppPoints + self.sizeMMatrix
self.plog('Initializing problem...\n')
qualityMatrix = zeros((self.numQualSuppPoints,1))
+ numPlifs = 24
+ numSuppPoints = 30
+ allMatchingPlifs = initializeQualityScoringFunctions(numPlifs,numSuppPoints)
+ # self.numQualSuppPoints
+
#############################################################################################
# Training
#############################################################################################
qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
currentAlignment.setQualityMatrix(qualityMat,self.numQualSuppPoints)
+ currentAlignment.setMatchPlifs()
+
#print 'PYTHON: Calling myalign...'
# calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\