+ moved tools from training dataset generation to their own folder
[qpalma.git] / tools / calculateAlignmentQuality.m
1 %
2 % This script compares two alignments based on their EST overlaps
3 %
4 % The ground_truth and testrun
5 %
6 load /fml/ag-raetsch/home/fabio/tmp/zebrafish/confirmed_sequences.mat;
7 testrun = genes;
8 load /fml/ag-raetsch/share/projects/altsplicedata/zebrafish/confirmed_sequences.mat;
9 ground_truth = genes;
10 clear genes;
11
12 disp('Loaded data...');
13
14 fh = fopen('overlapping.pos','w+');
15
16 for i = 1:length(ground_truth)
17 currentEntry = ground_truth(i);
18 currentExons = currentEntry.exons;
19 assert (length(currentEntry.transcripts) == length(currentEntry.exons));
20 numberOfEsts = length(currentEntry.transcripts);
21
22 if mod(i,100) == 0
23 fprintf('.')
24 end
25
26 for j = 1:length(testrun)
27 currentPred = testrun(j);
28
29 if ~strcmp(currentEntry.chr,currentPred.chr) || currentEntry.is_alt_spliced
30 continue;
31 end
32
33 start = currentEntry.start;
34 stop = currentEntry.stop;
35 predStart = currentPred.start;
36 predStop = currentPred.stop;
37
38 % if both entries are not overlapping at all
39 if predStop < start || predStart > stop
40 continue;
41 end
42
43 currentPredExons = currentPred.exons;
44 numberOfPredEsts = length(currentPredExons);
45
46 % Loop over all ESTs in ground truth and testrun
47 for estIdx = 1:numberOfEsts
48 %disp(sprintf('estIdx %i ',estIdx));
49 numberOfExons = size(currentExons{estIdx},1);
50
51 for exonIdx = 1:(numberOfExons-1)
52 intronStart = currentExons{estIdx}(exonIdx,2);
53 intronStop = currentExons{estIdx}(exonIdx+1,1);
54
55 for predEstIdx = 1:numberOfPredEsts
56 numberOfPredExons = size(currentPredExons{predEstIdx},1);
57 currentESTName = currentPred.transcripts{predEstIdx};
58
59 for predExonIdx = 1:numberOfPredExons
60 %%disp(sprintf('predExonIdx %i ',predExonIdx));
61 predExonStart = currentPredExons{predEstIdx}(predExonIdx,1);
62 predExonStop = currentPredExons{predEstIdx}(predExonIdx,2);
63
64 %disp('\n');
65 %%disp(sprintf('%i %i %i %i %i %i\n',i,j,estIdx,predEstIdx,exonIdx,predExonIdx));
66
67 % est is covering full intron
68 if intronStart > predExonStart && intronStop < predExonStop
69 fprintf(fh,sprintf(' %s is completely overlapping from %d %d\n',currentESTName,intronStart,intronStop));
70 % est is nested inside intron
71 elseif intronStart < predExonStart && intronStop > predExonStop
72 fprintf(fh,sprintf(' %s is completely inside intron from %d %d\n',currentESTName,predExonStart,predExonStop));
73 % end of exonth is nested inside predExoniction
74 elseif intronStart > predExonStart && predExonStop > intronStart && intronStop > predExonStop
75 fprintf(fh,sprintf('%s is upstream overlapping from %d %d\n',currentESTName,intronStart,predExonStop));
76 % predExoniction is nested inside exonth
77 elseif intronStart < predExonStart && predExonStart < intronStop && intronStop < predExonStop
78 fprintf(fh,sprintf('%s is downstream overlapping from %d %d\n',currentESTName,predExonStart,intronStop));
79 else
80 d=1;
81 end
82 end
83 end
84 end
85 end
86 end
87 end