prhs - Array of pointers to input data. */
Alignment::Alignment() {
-
len = 0;
limits = 0;
penalties = 0;
Alignment::~Alignment() {}
-void Alignment::myalign(int nr_paths, char* dna, int dna_len, char* est,
- int est_len, struct penalty_struct h, double* matchmatrix, int mm_len,
- double* donor, int d_len, double* acceptor, int a_len,
- bool remove_duplicate_scores, bool print_matrix) {}
-
+void Alignment::setQualityMatrix(double* qualityMat, int length){}
void Alignment::getSpliceAlign(){}
void Alignment::getEstAlign(){}
void Alignment::getWeightMatch(){}
void Alignment::getTotalScores(){}
void Alignment::getDNAEST(){}
-#if 0
-void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
-{
-
-
+void Alignment::myalign(int nr_paths, char* dna, int dna_len, char* est,
+ int est_len, struct penalty_struct h, double* matchmatrix, int mm_len,
+ double* donor, int d_len, double* acceptor, int a_len,
+ bool remove_duplicate_scores, bool print_matrix) {
- /***************************************************************************/
- // right number of arguments?
- /***************************************************************************/
- if(nrhs != 9)
- {
- mexPrintf("10 input arguments please: \n [splice_align, est_align, weightMatch, alignmentscores, dnaest] = myalign(nr_paths, dna, est, cellarray, matchmatrix, donor, acceptor, remove_duplicate_scores, print_matrix)\n");
- return ;
- }
- if(nlhs != 5)
- {
- mexPrintf("5 output arguments please\n [splice_align, est_align, weightMatch, alignmentscores, dnaest] = myalign(nr_paths, dna, est, cellarray, matchmatrix, donor, acceptor, remove_duplicate_scores, print_matrix)\n");
- return ;
- }
-
-
/***************************************************************************/
// variables
/***************************************************************************/
int arg = 0 ; // counter variable
int failure ; //
-
- /***************************************************************************/
- // 0. nr_paths
- /***************************************************************************/
- int nr_paths = (int)*mxGetPr(prhs[arg]); // 0. input (double)
- arg++; // arg -> 1
-
-
- /***************************************************************************/
- // 1. dna
- /***************************************************************************/
- int dna_len = mxGetN(prhs[arg]) + 1; // mxGetN: number of columns (int)
- char* dna = (char*)mxCalloc(dna_len, sizeof(char));
-
- failure = mxGetString(prhs[arg], dna, dna_len); // NULL at end of string
- if (failure) { // mxGetString returns 0 if success, 1 on failure
- mexPrintf("couldn't open dna");
- return;
- }
- arg++; // arg -> 2
-
-
- /***************************************************************************/
- // 2. est
- /***************************************************************************/
- int est_len = mxGetN(prhs[arg]) + 1;
- char* est = (char*)mxCalloc(est_len, sizeof(char));
-
- failure = mxGetString(prhs[arg], est, est_len);
- if (failure) {
- mexPrintf("couldn't open est");
- return;
- }
- arg++; // arg -> 3
-
-
+ penalty_struct* functions;
+#if 0
/***************************************************************************/
// 3. h (cell array)
/***************************************************************************/
- penalty_struct* functions;
mwSize anz_func = 1;
functions = read_penalty_struct_from_cell(prhs[arg], anz_func);
// mexPrintf("entry %d: <%f>\n", i, matchmatrix[i]);
//}
arg++; // arg -> 5
-
-
- /***************************************************************************/
- // 5. donor
- /***************************************************************************/
- int donor_length = mxGetN(prhs[arg]);
- double *donor;
- donor = mxGetPr(prhs[arg]);
- //mexPrintf("Donor eingelesen: <%f>\n", donor[0]);
- arg++; // arg -> 6
-
-
- /***************************************************************************/
- // 6. acceptor
- /***************************************************************************/
- int acceptor_length = mxGetN(prhs[arg]);
- double *acceptor;
- acceptor = mxGetPr(prhs[arg]);
- //mexPrintf("Acceptor eingelesen: <%f>\n", acceptor[0]);
- arg++; // arg -> 7
-
-
- /***************************************************************************/
- // 7.remove_duplicate_scores
- /***************************************************************************/
- bool remove_duplicate_scores = (bool)*mxGetPr(prhs[arg]);
- arg++; // arg -> 8
-
-
- /***************************************************************************/
- // 8. print_matrix
- /***************************************************************************/
- int print_matrix = (int)*mxGetPr(prhs[arg]);
- arg++; // arg -> 9
-
+#endif
/***************************************************************************/
// initialize alignment matrices and call fill_matrix()
//possible donor positions
int nr_donor_sites = 0 ;
- for (int ii=0; ii<donor_length; ii++) {
+ for (int ii=0; ii<d_len; ii++) {
if(isfinite(donor[ii])) {
nr_donor_sites++ ;
}
}
- //mwSize donor_sites[nr_donor_sites] ;
- mwSize* donor_sites = (mwSize*)mxCalloc(nr_donor_sites,sizeof(mwSize));
+
+ int* donor_sites = new int[nr_donor_sites];
int donor_idx = 0 ;
- for (int ii=0; ii<donor_length; ii++) {
+ for (int ii=0; ii<d_len; ii++) {
if(isfinite(donor[ii])) {
donor_sites[donor_idx] = ii+1 ;
donor_idx++ ;
}
}
+ int* max_score_positions = new int[nr_paths*2];
- //int max_score_positions[nr_paths*2]; //local alignment: where is the highest alignment score [i1,j1,i2,j2...]
- mwSize* max_score_positions = (mwSize*)mxCalloc(nr_paths*2,sizeof(mwSize));
-
- //Pre_score* matrices[nr_paths];
- Pre_score** matrices = (Pre_score**)mxCalloc(nr_paths,sizeof(Pre_score*));
+ Pre_score** matrices = new Pre_score*[nr_paths];
for (int z=0; z<nr_paths; z++) {
- //matrices[z] = new Pre_score[dna_len * est_len];
- //mexPrintf("size of one matrice: %d\n", est_len*dna_len*sizeof(Pre_score)) ;
- matrices[z] = (Pre_score*)mxCalloc(dna_len*est_len,sizeof(Pre_score));
+ matrices[z] = new Pre_score[dna_len * est_len];
}
fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, functions, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
- //just some info
- //for (int z=0; z<nr_paths; z++) {
- // mexPrintf("(%d, %d)\n", max_score_positions[2*z], max_score_positions[2*z +1]) ;
- //}
-
/***************************************************************************/
// return arguments etc.
/***************************************************************************/
int result_length; //Eine Variable fuer alle Matrizen
- //int splice_align[(dna_len-1)*nr_paths]; //SpliceAlign for DNA - 1 line per matrix
- //int est_align[(est_len-1)*nr_paths]; //SpliceAlign for EST - 1 line per matrix
- //int mmatrix_param[(mlen*mlen)*nr_paths];//Matrix fuer Scorematrixparameter - 1 Zeile pro Matrix
- int* splice_align = (int*)mxCalloc((dna_len-1)*nr_paths,sizeof(int));
- int* est_align = (int*)mxCalloc((est_len-1)*nr_paths,sizeof(int));
- int* mmatrix_param = (int*)mxCalloc((mlen*mlen)*nr_paths,sizeof(int));
+ int* splice_align = new int[(dna_len-1)*nr_paths];
+ int* est_align = new int[(est_len-1)*nr_paths];
+ int* mmatrix_param = new int[(mlen*mlen)*nr_paths];
double alignmentscores[nr_paths]; //alignment score for each path/matrix
-
-
- memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
- memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
- memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
- memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
-
- // dnaest
- plhs[nlhs-1] = mxCreateCellMatrix(2, nr_paths);
-
-
- //mexPrintf("DEBUG: decoding %d paths\n",nr_paths);
+ //memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
+ //memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
+ //memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
+ //memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
+
for (int z=0; z<nr_paths; z++) {
result_length = 0 ;
bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, s_align, e_align, mparam, alignmentscores, max_score_positions);
// dnaest
- mxArray* mxdna = mxCreateDoubleMatrix(result_length, 1, mxREAL);
- mxArray* mxest = mxCreateDoubleMatrix(result_length, 1, mxREAL);
- double *DNA = mxGetPr(mxdna) ;
- double *EST = mxGetPr(mxest) ;
- mxSetCell(plhs[nlhs-1], z*2, mxdna) ;
- mxSetCell(plhs[nlhs-1], z*2+1, mxest) ;
+ double *DNA = new double[result_length];
+ double *EST = new double[result_length];
//backtracking
- mwSize i = max_score_positions[2*z] ; //i (est)
- mwSize j = max_score_positions[2*z +1] ; //j (dna)
- mwSize prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i;
- mwSize prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
- mwSize prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+ int i = max_score_positions[2*z] ; //i (est)
+ int j = max_score_positions[2*z +1] ; //j (dna)
+ int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i;
+ int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+ int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
for (int ii=result_length; ii>0; ii--) {
if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
}
} //end of z
-
-
-
- //mexPrintf("DEBUG:Returning arguments\n");
- int index = 0 ;
-
- //splice_align (mxArray):
- const mwSize dims0[] = {(dna_len-1), nr_paths};
- //const int dims0[] = {(dna_len-1), nr_paths};
- plhs[index] = mxCreateNumericArray(2, dims0, mxINT32_CLASS, mxREAL);
- mwSize* start0 = (mwSize*)(mxGetData(plhs[index]));
- memcpy(start0, splice_align, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
- index++ ; // ->1
-
- //est_align (mxArray):
- const mwSize dims1[] = {(est_len-1), nr_paths};
- //const int dims1[] = {(est_len-1), nr_paths};
- plhs[index] = mxCreateNumericArray(2, dims1, mxINT32_CLASS, mxREAL);
- mwSize* start1 = (mwSize*)(mxGetData(plhs[index]));
- memcpy(start1, est_align, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
- index++ ; // ->2
-
- //weightmatch (mxArray)
- const mwSize dims2[] = {mlen*mlen, nr_paths};
- //const int dims2[] = {mlen*mlen, nr_paths};
- plhs[index] = mxCreateNumericArray(2, dims2, mxINT32_CLASS, mxREAL);
- mwSize* start2= (mwSize*)(mxGetData(plhs[index]));
- memcpy(start2, mmatrix_param, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
- index++ ; // ->3
-
- //alignmentscore
- plhs[index] = mxCreateDoubleMatrix(1,nr_paths,mxREAL);
- if (plhs[index] == NULL)
- {
- printf("%s : Out of memory on line %d\n", __FILE__, __LINE__);
- printf("Unable to create mxArray.\n");
- exit(1);
- }
- double* start3 = (double*)(mxGetData(plhs[index]));
- memcpy(start3, alignmentscores, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
- index++ ; // ->4
-
- assert(nlhs==index+1) ;
-
- //Free Memory
- //mexPrintf("DEBUG:Free Memory\n");
- mxFree(donor_sites);
- mxFree(max_score_positions);
- for (int i=nr_paths-1;i>=0; i--)
- {
- //delete[] matrices[i];
- mxFree(matrices[i]);
- }
- mxFree(matrices);
- mxFree(splice_align);
- mxFree(est_align);
- mxFree(mmatrix_param);
- mxFree(dna);
- mxFree(est);
-
- return;
-
}
-#endif
from computeSpliceWeights import *
import QPalmaDP
+from SIQP_CPX import SIQPSolver
from penalty_lookup import *
from compute_donacc import *
"""
A training method for the QPalma project
-Overall :
+Overall procedure:
- 1. Load the data
- via paths_load_data
-
+ 1. Load the data -> via paths_load_data
2. Create a QP using SIQP (paths_create_qp)
-
3. Set initial params using set_param_palma -> palma_utils
-
4. computeSpliceAlign
-
5. compute_SpliceAlignLocal
-
6. computeSpliceWeights
-
7. myalign_local
"""
random_N = 100 ; # number of constraints to generate per iteration
iteration_steps = 200 ; #upper bound on iteration steps
- # myalign
remove_duplicate_scores = 0
anzpath = 2
print_matrix = 0
#############################################################################################
# Training
#############################################################################################
- print 'Starting training...'
-
- #The alignment matrix M looks like
-
- # DNA
-
- # A C G T -
- # ------------
- # A
- #
- #E C
- #S
- #T G
-
- # T
-
- # -
-
-
- #where
-
- # - is a gap,
+ plog(logfh,'Starting training...')
+ numExamples = N
+ C=1.0
numDonorSupportPoints = 30
numAcceptorSupportPoints = 30
numIntronLengthSupportPoints = 30
- numQualityScoreSupportPoints = 0
+ sizeMMatrix = 36
+ numQualityScoreSupportPoints = 16*0
numFeatures = numDonorSupportPoints\
+ numAcceptorSupportPoints\
+ numIntronLengthSupportPoints\
+ + sizeMMatrix\
+ numQualityScoreSupportPoints
- numExamples = N
-
- from SIQP_CPX import SIQPSolver
- C=1.0
+ qualityMatrix = [0.0]*numQualityScoreSupportPoints
- logfile = open('qpalma.log','w+')
+ plog(logfh,'Initializing problem...\n')
#problem = SIQPSolver(numFeatures,numExamples,C,logfile)
-
- #xis = zeros(1,N) ; #ninitialize slack variables
#num_path = anzpath*ones(1,N) ; # nr of alignments done (best path, second-best path etc.)
#gap = zeros(1,N) ; %? sum of differences between true alignment score and 'false' alignment scores
+ #for step_nr=1:iteration_steps,
+ plog(logfh,'Starting training...\n')
- #for step_nr=1:iteration_steps,
iteration_nr = 1
while True:
for exampleId in range(numExamples):
if exampleId % 1000 == 0:
print 'Current example nr %d' % exampleId
+
dna = Sequences[exampleId]
est = Ests[exampleId]
# Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
trueSpliceAlign, trueWeightMatch = computeSpliceAlign(dna, exons)
+ # Calculate
trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
+ # Reshape currentW param
+ currentW = zeros((numFeatures,1))
+ currentW[0:numDonorSupportPoints,1] = trueWeigthDon[:]
+ currentW[0:numAcceptorSupportPoints,1] = trueWeigthAcc[:]
+ currentW[0:numIntronLengthSupportPoints,1] = trueWeigthIntron[:]
+ currentW[0:numDonorSupportPoints,1] = trueWeigthMatch[:]
+
+ #currentPhi = d.penalties a h mmatrix
+
+ # Calculate w'phi(x,y) the total score of the alignment
+ #alignmentScore = currentW.T * currentPhi
+
+ #
+ # Calculate wrong alignments
#
- # compute wrong alignments
- #
- nr_paths = 1
+ nr_paths = 2
dna = 'acgtagct'
dna_len = len(dna)
print_matrix = False
currentAlignment = QPalmaDP.Alignment()
+
+ qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
+ currentAlignment.setQualityMatrix(qualityMat,numQualitySupportPoints)
ps = h.convert2SWIG()
currentAlignment.myalign( nr_paths, dna, dna_len,\
logfh.close()
"""
- for id = 1:N
- %fprintf('%i\n', id) ;
- if (mod(id,50)==0), fprintf(1,'.'); end
-
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % True Alignment and Comparison with wrong ones
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
- %Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
- [true_SpliceAlign, true_weightMatch] = compute_SpliceAlign_local(dna, exons) ;
- [true_weightDon, true_weightAcc, true_weightIntron] = ...
- compute_SpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp) ;
-
- double(true_weightMatch) ;
-
- % Berechne Gewichtsmatrix fuer aktuelle id (matrix anlegen)
- Weights = zeros(num_path(id)+1, 126) ; %first line: true alignment, rest: wrong alignments
- Weights(1,:) = [true_weightIntron, true_weightDon, true_weightAcc, true_weightMatch] ;
-
- %Score of whole alignment
- AlignmentScores = zeros(1, num_path(id)+1) ; %first entry: true alignment
- AlignmentScores(1) = Weights(1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ;
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Wrong Alignments
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
remove_duplicate_scores, print_matrix);
- %[SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(1, dna, est, {h}, mmatrix, donor,
- %acceptor, remove_duplicate_scores, print_matrix);
-
%return values (are int32, have to be double later cause we compute scores
SpliceAlign = double(SpliceAlign') ; %column
weightMatch = double(weightMatch') ;
-
%test
for pfadNo = 1:num_path(id)
assert(sum(weightMatch(pfadNo,7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
end
assert(all(new_weightMatch == weightMatch(pfadNo,:))) ;
assert(sum(new_weightMatch(7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
- %reshape(new_weightMatch,6,6)
- %reshape(weightMatch(pfadNo,:),6,6)
end
%assert that it is the right file
assert(all(dna(find(SpliceAlign(1,:)==1)) == 'g')) ;
%just some info
- %print_align(dnaest,1) ;
- %exons(3,2)-exons(1,1)
- %length(dnaest{1,1})
- %keyboard
%Berechne Gewichte der durch Alignment berechneten Pfade
true_map = zeros(1,1+num_path(id)) ;
end
fprintf('\n') ;
#################################################################################
- const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
+ const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
+
objValue,w,self.slacks = solver.solve()
-
- # [param,xis] = paths_qp_solve(lpenv,Q,f,A,b,LB,UB);
- # sum_xis = sum(xis)
- # maxgap=max(gap)
- [mean(xis>=1) mean(gap>=1) mean(xis>=1e-3) mean(gap>=1e-3)]
-
- if max(gap)<=column_eps:
- break
- assert(all(maxgap<=column_eps))
- assert(length(take_idx)==N)
+ sum_xis = 0
+ for elem in self.slacks:
+ sum_xis += elem
+
"""
+
print 'Training completed'
if __name__ == '__main__':
run()
-
- """
- def __init__(self):
- numAcceptorParams = 0
- numDonorParams = 0
- numIntronLengthParams = 30
-
- # how many supporting points do we have for the
- # quality function per base
- numQualityParamsPerBase = 5
-
- # we have quality params for each possible
- # for all possible combinations of
- # {A,C,G,T} x {A,C,G,T,-}
- # 4 x 5
- numQualityParams = numQualityParamsPerBase * 4 * 5
-
- # for all possible combinations of
- # {A,C,G,T} x {A,C,G,T,-,*} plus
- # {-} x {A,C,G,T,-}
- # 1 x 5
- numParams = numQualityParams + 1*5
- """