+ fixed some index bugs
[qpalma.git] / tools / calculateAlignmentQuality.m
1 %
2 % This script compares two alignments based on their EST overlaps
3 %
4 % The ground_truth and testrun
5 %
6 load /fml/ag-raetsch/home/fabio/tmp/zebrafish/confirmed_sequences.mat;
7 testrun = genes;
8 load /fml/ag-raetsch/share/projects/altsplicedata/zebrafish/confirmed_sequences.mat;
9 ground_truth = genes;
10 clear genes;
11
12 fh = fopen('overlapping.pos','w+')
13 for i = 1:length(ground_truth)
14 currentEntry = ground_truth(i);
15 currentExons = currentEntry.exons;
16 assert (length(currentEntry.transcripts) == length(currentEntry.exons));
17 numberOfEsts = length(currentEntry.transcripts);
18
19 for j = 1:length(testrun)
20 currentPred = testrun(j);
21
22 if ~strcmp(currentEntry.chr,currentPred.chr) || currentEntry.is_alt_spliced
23 continue;
24 end
25
26 start = currentEntry.start;
27 stop = currentEntry.stop;
28 predStart = currentPred.start;
29 predStop = currentPred.stop;
30
31 % if both entries are not overlapping at all
32 if predStop < start || predStart > stop
33 continue;
34 end
35
36 currentPredExons = currentPred.exons;
37 numberOfPredEsts = length(currentPredExons);
38
39 % Loop over all ESTs in ground truth and testrun
40 for estIdx = 1:numberOfEsts
41 %disp(sprintf('estIdx %i ',estIdx));
42 numberOfExons = size(currentExons{estIdx},1);
43
44 for exonIdx = 1:(numberOfExons-1)
45 intronStart = currentExons{estIdx}(exonIdx,2);
46 intronStop = currentExons{estIdx}(exonIdx+1,1);
47
48 for predEstIdx = 1:numberOfPredEsts
49 numberOfPredExons = size(currentPredExons{predEstIdx},1);
50 currentESTName = currentPred.transcripts{predEstIdx};
51
52 for predExonIdx = 1:numberOfPredExons
53 %%disp(sprintf('predExonIdx %i ',predExonIdx));
54 predExonStart = currentPredExons{predEstIdx}(predExonIdx,1);
55 predExonStop = currentPredExons{predEstIdx}(predExonIdx,2);
56
57 %disp('\n');
58 %%disp(sprintf('%i %i %i %i %i %i\n',i,j,estIdx,predEstIdx,exonIdx,predExonIdx));
59
60 % est is covering full intron
61 if intronStart >= predExonStart && intronStop <= predExonStop
62 fprintf(fh,sprintf(' %s is completely overlapping from %d %d\n',currentESTName,intronStart,intronStop))
63 % est is nested inside intron
64 if intronStart < predExonStart && intronStop > predExonStop
65 fprintf(fh,sprintf(' %s is completely inside intron from %d %d\n',currentESTName,predExonStart,predExonStop))
66 % end of exonth is nested inside predExoniction
67 elseif intronStart >= predExonStart && predExonStop >= intronStart && intronStop >= predExonStop
68 fprintf(fh,sprintf('%s is upstream overlapping from %d %d\n',currentESTName,intronStart,predExonStop))
69 % predExoniction is nested inside exonth
70 elseif intronStart <= predExonStart && predExonStart <= intronStop && intronStop <= predExonStop
71 fprintf(fh,sprintf('%s is downstream overlapping from %d %d\n',currentESTName,predExonStart,intronStop))
72 fprintf('%s %d %d',fh)
73 else
74 d=1;
75 end
76 end
77 end
78 end
79 end
80 end
81 end