% % This script compares two alignments based on their EST overlaps % % The ground_truth and testrun % load /fml/ag-raetsch/home/fabio/tmp/zebrafish/confirmed_sequences.mat; testrun = genes; load /fml/ag-raetsch/share/projects/altsplicedata/zebrafish/confirmed_sequences.mat; ground_truth = genes; clear genes; disp('Loaded data...'); fh = fopen('overlapping.pos','w+'); for i = 1:length(ground_truth) currentEntry = ground_truth(i); currentExons = currentEntry.exons; assert (length(currentEntry.transcripts) == length(currentEntry.exons)); numberOfEsts = length(currentEntry.transcripts); if mod(i,100) == 0 fprintf('.') end for j = 1:length(testrun) currentPred = testrun(j); if ~strcmp(currentEntry.chr,currentPred.chr) || currentEntry.is_alt_spliced continue; end start = currentEntry.start; stop = currentEntry.stop; predStart = currentPred.start; predStop = currentPred.stop; % if both entries are not overlapping at all if predStop < start || predStart > stop continue; end currentPredExons = currentPred.exons; numberOfPredEsts = length(currentPredExons); % Loop over all ESTs in ground truth and testrun for estIdx = 1:numberOfEsts %disp(sprintf('estIdx %i ',estIdx)); numberOfExons = size(currentExons{estIdx},1); for exonIdx = 1:(numberOfExons-1) intronStart = currentExons{estIdx}(exonIdx,2); intronStop = currentExons{estIdx}(exonIdx+1,1); for predEstIdx = 1:numberOfPredEsts numberOfPredExons = size(currentPredExons{predEstIdx},1); currentESTName = currentPred.transcripts{predEstIdx}; for predExonIdx = 1:numberOfPredExons %%disp(sprintf('predExonIdx %i ',predExonIdx)); predExonStart = currentPredExons{predEstIdx}(predExonIdx,1); predExonStop = currentPredExons{predEstIdx}(predExonIdx,2); %disp('\n'); %%disp(sprintf('%i %i %i %i %i %i\n',i,j,estIdx,predEstIdx,exonIdx,predExonIdx)); % est is covering full intron if intronStart > predExonStart && intronStop < predExonStop fprintf(fh,sprintf(' %s is completely overlapping from %d %d\n',currentESTName,intronStart,intronStop)); % est is nested inside intron elseif intronStart < predExonStart && intronStop > predExonStop fprintf(fh,sprintf(' %s is completely inside intron from %d %d\n',currentESTName,predExonStart,predExonStop)); % end of exonth is nested inside predExoniction elseif intronStart > predExonStart && predExonStop > intronStart && intronStop > predExonStop fprintf(fh,sprintf('%s is upstream overlapping from %d %d\n',currentESTName,intronStart,predExonStop)); % predExoniction is nested inside exonth elseif intronStart < predExonStart && predExonStart < intronStop && intronStop < predExonStop fprintf(fh,sprintf('%s is downstream overlapping from %d %d\n',currentESTName,predExonStart,intronStop)); else d=1; end end end end end end end