b0VIM 7.0wOG4pfabiocongo~fabio/svn/projects/QPalma/python/modsel_paths.m
3210#"! UtpiVjJadia0/]*
'
omk
lF
Z
M
? > 4 W/ srq`D'}}QI.<,$# don_supp = Donors{id} ; %NoiseMatrix = Noises{id} ; exons = Exons{id} ; end continue; fprintf('%d was skipped, because %d MB of memory was needed\n',id,memory_MB); if memory_MB > ARGS.MAX_MEM memory_MB = round(30*num_path(id)*length(dna)*length(est)/1024/1024)+300; est = Ests{id} ; dna = Sequences{id} ; %variables for this id if (mod(id,50)==0), fprintf(1,'.'); end %fprintf('%i\n', id) ; for id = 1:N fprintf('\nIteration step: %i of %i\n', step_nr, iteration_steps) ; ticfor step_nr=1:iteration_steps,gap = zeros(1,N) ; %? sum of differences between true alignment score and 'false' alignment scoresnum_path = anzpath*ones(1,N) ; %nr of alignments done (best path, secondbest path etc.)xis = zeros(1,N) ; %initialize slack variablesdisp('Training parameters ...');%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Training%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%end end Donors{i}(Donors{i}>20)=1 ; Acceptors{i}(Acceptors{i}>20)=1 ; for i=1:length(Acceptors)if ~ARGS.train_with_splicesitescoreinformation%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% delete splicesitescoreinformation%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%[h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation) ;[paramfilename,param,A,b] = paths_init_param(ARGS,genome_info,N);ARGS.C_qp_smallness = C_qp_smallness;ARGS.C_lp_smoothness = C_lp_smoothness;ARGS.C_qp_smoothness = C_qp_smoothness;ARGS.C_qp_penalties = C_qp_penalties;[C_qp_penalties,C_qp_smoothness,C_lp_smoothness,C_qp_smallness,column_eps,Q,f,LB,UB] = paths_create_qp(N,ARGS.C);lpenv = cplex_license(0) ;%qp_solve[random_N,iteration_steps,remove_duplicate_scores,anzpath,print_matrix] = paths_set_const;N = length(Sequences) ; fprintf('Number of training examples: %i\n', N) ; %training%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Constants %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%[Sequences, Acceptors, Donors, Exons, Ests, Noises] = paths_load_data('training',genome_info,ARGS);fprintf('%s\n', genome_info.basedir) ;genome_info = init_genome(gen_file) ;gen_file=sprintf('%s/genome.config', ARGS.basedir) fprintf('Initializing genome in ') ;ARGS = paths_set_param(ARGS);end ARGS = [];if ~exist('ARGS','var')%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 126x Nx% f = ([0 ... 0] [1/N 1/N ... 1/N])%% res = [30 ..., 30 ..., 30 ..., 36 ..., 126 ..., N xis] % % with%% s.t. %% min ( + 1/2 res' Q res)%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% s.t. theta * v_i_true  theta * v_i_false >= 1  xi_i forAll i in N% % min ( 1/N * sum_i=1^N (xi_i) + C * P(theta) )%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% N xis% 126 help parameters ((ab)^2)% 126 (3*30 + 36) thetas% number of trained parameters: res = 2*126 + N% Also added a check whether rproc jobs crashed.% It is exactly the same, except added two more flags, C and LOCAL_ALIGN% This file was modified from train_paths_local_rproc by Cheng Soon Ong on 1 Nov 2006adJ?8*#
Z
;
0
(
j>/% ogbJ

w
T
v 2 ) Jwtga[Z:fprintf('Training completed\n');assert(length(take_idx)==N)assert(all(maxgap<=column_eps))end ; toc pause(1) ; end ; break if max(gap)<=column_eps, 'param', 'h', 'd', 'a', 'mmatrix', 'A', 'b') ; save(paramfilename, ... [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation) ; [mean(xis>=1) mean(gap>=1) mean(xis>=1e3) mean(gap>=1e3)] maxgap=max(gap) sum_xis = sum(xis) [param,xis] = paths_qp_solve(lpenv,Q,f,A,b,LB,UB); fprintf('\n') ; end end ; gap(id) = 0 ; else gap(id) = 1sum((Weights(1,:)Weights(2,:)).*param)xis(id) ; 1] ; b=[b; Weights(2,:)Weights(1,:) zeros(1,126) e] ; A=[A; e(id) = 1 ; e=zeros(1,N) ; if size(Weights,1)>1 & sum((Weights(1,:)Weights(2,:)).*param)+xis(id)<1+column_eps, %if there is a false alignment Weights = Weights([1 min(find(true_map==0))], :) ; %Choose true and first false alignment for extending A end ; num_path(id)=num_path(id)+1 ; %next iteration step: one alignment more if all(true_map==1) end assert(abs(Weights(z,:) * param(1:126)' AlignmentScores(z)) <= 1e6) ; %abs: absolute value for z = 1:num_path(id) %%%set_param_palma should have done this right end ; keyboard warning('true score > max score\n') ; AlignmentScores if AlignmentScores(1) >= max(AlignmentScores(2:end))+1e6, % maximal one WHYYYYY? % the true label sequence should not have a larger score than the end end true_map(pfadNo+1)=1 ; if norm(Weights(1,:)Weights(pfadNo+1,:))<1e5, assert(abs(Gesamtscores(pfadNo)  AlignmentScores(pfadNo+1)) < 1e6) ; %Test, ob Alignprogr. auch das richtige Ergebnis liefert: AlignmentScores(pfadNo+1) = Weights(pfadNo+1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ; Weights(pfadNo+1,:) = [weightIntron, weightDon, weightAcc, weightMatch(pfadNo,:)] ; %Gewichte in restliche Zeilen der Matrix speichern path_loss(pfadNo+1) = sum(double(SpliceAlign(pfadNo,:))~=true_SpliceAlign) ; %not too simple? compute_SpliceWeights(d, a, h, SpliceAlign(pfadNo,:), don_supp, acc_supp) ;ad)Vcsm
+
KF'"(#
S
O
J
J @:~UTSI)bt'lgS6o\E% [weightDon, weightAcc, weightIntron] = ... est_numbers = dnaest{2,pfadNo} ; dna_numbers = dnaest{1,pfadNo} ; for pfadNo = 1:num_path(id) path_loss(1) = 0 ; path_loss=[] ; true_map(1) = 1 ; true_map = zeros(1,1+num_path(id)) ; %Berechne Gewichte der durch Alignment berechneten Pfade %keyboard %length(dnaest{1,1}) %exons(3,2)exons(1,1) %print_align(dnaest,1) ; %just some info assert(all(dna(find(SpliceAlign(1,:)==1)) == 'g')) ; %assert that it is the right file end %reshape(weightMatch(pfadNo,:),6,6) %reshape(new_weightMatch,6,6) assert(sum(new_weightMatch(7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ; assert(all(new_weightMatch == weightMatch(pfadNo,:))) ; end end new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii) + 1) = new_weightMatch(dnaest{1,pfadNo}(iii)*6 + dnaest{2,pfadNo}(iii)+1) + 1 ; if dnaest{2,pfadNo}(iii) ~= 6 for iii = 1:length(dnaest{1,pfadNo}) new_weightMatch = zeros(1,36) ; assert(sum(weightMatch(pfadNo,7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ; for pfadNo = 1:num_path(id) %test weightMatch = double(weightMatch') ; SpliceAlign = double(SpliceAlign') ; %column %return values (are int32, have to be double later cause we compute scores %acceptor, remove_duplicate_scores, print_matrix); %[SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(1, dna, est, {h}, mmatrix, donor, remove_duplicate_scores, print_matrix); myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ... [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ... %call myalign acceptor = [acceptor(2:end) Inf] ; %myalign wants the acceptor site on the g of the ag [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ; % Compute donor, acceptor with penalty_lookup_new %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Wrong Alignments %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% AlignmentScores(1) = Weights(1,:) * [h.penalties' ; d.penalties' ; a.penalties' ; mmatrix(:)] ; AlignmentScores = zeros(1, num_path(id)+1) ; %first entry: true alignment %Score of whole alignment Weights(1,:) = [true_weightIntron, true_weightDon, true_weightAcc, true_weightMatch] ; Weights = zeros(num_path(id)+1, 126) ; %first line: true alignment, rest: wrong alignments % Berechne Gewichtsmatrix fuer aktuelle id (matrix anlegen) double(true_weightMatch) ; compute_SpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp) ; [true_weightDon, true_weightAcc, true_weightIntron] = ... [true_SpliceAlign, true_weightMatch] = compute_SpliceAlign_local(dna, exons) ; %Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % True Alignment and Comparison with wrong ones %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% assert(all(dna(idx_acc(idx_acc>2)1)=='g')) ; assert(all(dna(idx_acc(idx_acc>3)2)=='a')) ; idx_acc = find(acc_supp~=Inf) ; assert(all(dna(idx_don(idx_don