void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, penalty_struct* functions , double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
{
+ printf("Entering fill_matrix...\n");
/*********************************************************************************************/
// variables
double prevValue ;
double tempValue;
int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
- //printf("max_len: %d\n", max_len) ;
+ printf("max_len: %d\n", max_len) ;
int num_elem ; // counter variable
//free memory
delete[] array;
delete[] scores;
-
+
+ printf("Leaving fill_matrix...\n");
return;
}
#include "qpalma_dp.h"
+#include <cstring>
+using namespace std;
/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
Alignment::~Alignment() {}
-void Alignment::setQualityMatrix(double* qualityMat, int length){}
+void Alignment::setQualityMatrix(double* qMat, int length){
+ qualityMatrix = new double[length];
+ for(int i=0; i<length; i++)
+ qualityMatrix[i] = qMat[i];
+}
void Alignment::getSpliceAlign(){}
void Alignment::getEstAlign(){}
void Alignment::getWeightMatch(){}
void Alignment::getTotalScores(){}
void Alignment::getDNAEST(){}
-void Alignment::myalign(int nr_paths, char* dna, int dna_len, char* est,
- int est_len, struct penalty_struct h, double* matchmatrix, int mm_len,
+void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+ int est_len_p, struct penalty_struct h, double* matchmatrix, int mm_len,
double* donor, int d_len, double* acceptor, int a_len,
bool remove_duplicate_scores, bool print_matrix) {
-
- /***************************************************************************/
- // variables
- /***************************************************************************/
- const int mlen = 6; // score matrix: length of 6 for "- A C G T N"
- int arg = 0 ; // counter variable
- int failure ; //
+ printf("Entering myalign...\n");
+
+ mlen = 6; // score matrix: length of 6 for "- A C G T N"
+ dna_len = dna_len_p;
+ est_len = est_len_p;
+ nr_paths = nr_paths_p;
- penalty_struct* functions;
#if 0
/***************************************************************************/
// 3. h (cell array)
matrices[z] = new Pre_score[dna_len * est_len];
}
- fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, functions, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+ printf("calling fill_matrix...\n");
+
+ fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, &h, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+ printf("after call to fill_matrix...\n");
/***************************************************************************/
// return arguments etc.
/***************************************************************************/
int result_length; //Eine Variable fuer alle Matrizen
- int* splice_align = new int[(dna_len-1)*nr_paths];
- int* est_align = new int[(est_len-1)*nr_paths];
- int* mmatrix_param = new int[(mlen*mlen)*nr_paths];
- double alignmentscores[nr_paths]; //alignment score for each path/matrix
+ splice_align = new int[(dna_len-1)*nr_paths];
+ est_align = new int[(est_len-1)*nr_paths];
+ mmatrix_param = new int[(mlen*mlen)*nr_paths];
+ alignmentscores[nr_paths]; //alignment score for each path/matrix
- //memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
- //memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
- //memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
- //memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
+ memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
+ memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
+ memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
+ memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
for (int z=0; z<nr_paths; z++) {
result_length = 0 ;
}
} //end of z
+
+ printf("Store return arguments...\n");
+
+
+ // spliceAlign
+ // estAlign
+ // weightmatch
+ // alignmentScore
+ // dnaest
+
+
+ printf("Leaving myalign...\n");
}
+
+void Alignment::getAlignmentResults(int* s_align, int* e_align,
+ int* mmatrix_p, double* alignscores) {
+
+ printf("Entering getAlignmentResults...\n");
+ uint splice_align_size = (dna_len-1)*nr_paths;
+ uint est_align_size = (est_len-1)*nr_paths;
+ uint mmatrix_param_size = (mlen*mlen)*nr_paths;
+ uint alignmentscores_size = nr_paths; //alignment score for each path/matrix
+
+ for(int idx=0; idx<splice_align_size; idx++)
+ s_align[idx] = splice_align[idx];
+
+ for(int idx=0; idx<est_align_size; idx++)
+ e_align[idx] = est_align[idx];
+
+ for(int idx=0; idx<mmatrix_param_size; idx++)
+ mmatrix_p[idx] = mmatrix_param[idx];
+
+ for(int idx=0; idx<alignmentscores_size; idx++)
+ alignscores[idx] = alignmentscores[idx];
+
+ printf("Leaving getAlignmentResults...\n");
+}
+
class Alignment {
private:
- int SpliceAlign;
- int EstAlign;
- int WeightMatch;
- int TotalScores;
+ int* splice_align;
+ int* est_align;
+ int* mmatrix_param;
+ double* alignmentscores;
+
+ double* qualityMatrix;
+
+ int dna_len;
+ int est_len;
+ int mlen;
+ int nr_paths;
INT len;
REAL *limits;
Alignment();
~Alignment();
- void myalign(int nr_paths, char* dna, int dna_len, char* est,
- int est_len, struct penalty_struct h, double* matchmatrix, int mm_len,
- double* donor, int d_len, double* acceptor, int a_len,
+void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+ int est_len_p, struct penalty_struct h, double* matchmatrix, int mm_len,
+ double* donor, int d_len, double* acceptor, int a_len,
bool remove_duplicate_scores, bool print_matrix);
void penSetLength(int l) { len = l; }
limits[i] = lts[i];
}
- void setQualityMatrix(double* qualityMat, int length);
+ void setQualityMatrix(double* qMat, int length);
void getSpliceAlign();
void getEstAlign();
void getWeightMatch();
void getTotalScores();
void getDNAEST();
+ void getAlignmentResults(int* s_align, int* e_align,
+ int* mmatrix_p, double* alignscores);
};
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+class Param:
+
+ def __init__(self):
+ """ default parameters """
+
+ self.basedir = '/fml/ag-raetsch/share/projects/qpalma/elegans_palma'
+ self.MAX_MEM = 31000;
+ self.LOCAL_ALIGN = 1;
+ self.init_param = 1;
+ self.train_with_splicesitescoreinformation = 1;
+ self.train_with_intronlengthinformation = 1;
+ self.C = 0.001;
+ self.microexon = 0;
+ self.prob = 0;
+ self.organism = 'elegans'
+ self.expt = 'training'
+
+ self.insertion_prob = self.prob/3 ;
+ self.deletion_prob = self.prob/3 ;
+ self.mutation_prob = self.prob/3 ;
from numpy.matlib import zeros
import pdb
-
def computeSpliceAlign(dna, exons):
"""
Exonpos: Anfang Exon bis 1 rechts neben Ende Exon
numberOfExons = (exons.shape)[0] # how many rows ?
exonSizes = [-1]*numberOfExons
-
for idx in range(numberOfExons):
exonSizes[idx] = exons[idx,1] - exons[idx,0]
+genome_info.basedir is /fml/ag-raetsch/share/projects/palma/elegans_uncut/
+Number of training examples: 4604
+Starting training...Initializing problem...
+Starting training...
# Calculate w'phi(x,y) the total score of the alignment
alignmentScore = currentW.T * currentPhi
- #
- # Calculate wrong alignments
- #
+ ################## Calculate wrong alignment(s) ######################
# Compute donor, acceptor with penalty_lookup_new
# returns two double lists
donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
#myalign wants the acceptor site on the g of the ag
- #acceptor = [acceptor(2:end) -Inf] ;
acceptor = acceptor[1:]
acceptor.append(-inf)
currentAlignment.setQualityMatrix(qualityMat,numQualSuppPoints)
ps = h.convert2SWIG()
- pdb.set_trace()
-
+ # returns [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest]
currentAlignment.myalign( nr_paths, dna, dna_len,\
est, est_len, ps, matchmatrix, mm_len, donor, d_len,\
acceptor, a_len, remove_duplicate_scores, print_matrix)
+ newSpliceAlign = QPalmaDP.createIntArrayFromList([0]*(dna_len*nr_paths))
+ newEstAlign = QPalmaDP.createIntArrayFromList([0]*(est_len*nr_paths))
+ newWeightMatch = QPalmaDP.createIntArrayFromList([0]*(mm_len*nr_paths))
+ newAlignmentScores = QPalmaDP.createDoubleArrayFromList([.0]*nr_paths)
+
+ currentAlignment.getAlignmentResults(newSpliceAlign, newEstAlign, newWeightMatch, newAlignmentScores)
+
+ print newSpliceAlign
+
print 'after myalign call...'
- # SpliceAlign = double(SpliceAlign') ; %column
- # weightMatch = double(weightMatch') ;
+ if iteration_nr == 2:
+ break
- break
iteration_nr += 1
break
logfh.close()
"""
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % Wrong Alignments
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %test
- for pfadNo = 1:num_path(id)
- assert(sum(weightMatch(pfadNo,7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
- new_weightMatch = zeros(1,36) ;
- for iii = 1:length(dnaest{1,pfadNo})
- if dnaest{2,pfadNo}(iii) ~= 6
- new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii) + 1) = new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii)+1) + 1 ;
- end
- end
- assert(all(new_weightMatch == weightMatch(pfadNo,:))) ;
- assert(sum(new_weightMatch(7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
- end
-
- %assert that it is the right file
- assert(all(dna(find(SpliceAlign(1,:)==1)) == 'g')) ;
-
- %just some info
-
- %Berechne Gewichte der durch Alignment berechneten Pfade
- true_map = zeros(1,1+num_path(id)) ;
- true_map(1) = 1 ;
- path_loss=[] ;
- path_loss(1) = 0 ;
- for pfadNo = 1:num_path(id)
- dna_numbers = dnaest{1,pfadNo} ;
- est_numbers = dnaest{2,pfadNo} ;
-
- [weightDon, weightAcc, weightIntron] = ...
- compute_SpliceWeights(d, a, h, SpliceAlign(pfadNo,:), don_supp, acc_supp) ;
-
- path_loss(pfadNo+1) = sum(double(SpliceAlign(pfadNo,:))~=true_SpliceAlign) ; %not too simple?
-
- %Gewichte in restliche Zeilen der Matrix speichern
- Weights(pfadNo+1,:) = [weightIntron, weightDon, weightAcc, weightMatch(pfadNo,:)] ;
-
- AlignmentScores(pfadNo+1) = Weights(pfadNo+1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ;
-
- %Test, ob Alignprogr. auch das richtige Ergebnis liefert:
- assert(abs(Gesamtscores(pfadNo) - AlignmentScores(pfadNo+1)) < 1e-6) ;
-
- if norm(Weights(1,:)-Weights(pfadNo+1,:))<1e-5,
- true_map(pfadNo+1)=1 ;
- end
- end
-
- % the true label sequence should not have a larger score than the
- % maximal one WHYYYYY?
- if AlignmentScores(1) >= max(AlignmentScores(2:end))+1e-6,
- AlignmentScores
- warning('true score > max score\n') ;
- keyboard
- end ;
-
- %%%set_param_palma should have done this right
- for z = 1:num_path(id)
- assert(abs(Weights(z,:) * param(1:126)' -AlignmentScores(z)) <= 1e-6) ; %abs: absolute value
- end
-
- if all(true_map==1)
- num_path(id)=num_path(id)+1 ; %next iteration step: one alignment more
- end ;
-
- %Choose true and first false alignment for extending A
- Weights = Weights([1 min(find(true_map==0))], :) ;
-
- %if there is a false alignment
- if size(Weights,1)>1 & sum((Weights(1,:)-Weights(2,:)).*param)+xis(id)<1+column_eps,
- e=zeros(1,N) ;
- e(id) = 1 ;
- A=[A;
- Weights(2,:)-Weights(1,:) zeros(1,126) -e] ;
- b=[b;
- -1] ;
- gap(id) = 1-sum((Weights(1,:)-Weights(2,:)).*param)-xis(id) ;
- else
- gap(id) = 0 ;
- end ;
- end
- fprintf('\n') ;
- #################################################################################
-
const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
objValue,w,self.slacks = solver.solve()