+ moved tools from training dataset generation to their own folder
[qpalma.git] / tools / calculateAlignmentQuality.m
index 84bc4d4..bab0263 100644 (file)
@@ -9,17 +9,24 @@ load /fml/ag-raetsch/share/projects/altsplicedata/zebrafish/confirmed_sequences.
 ground_truth = genes;
 clear genes;
 
-fh = fopen('overlapping.pos','w+')
+disp('Loaded data...');
+
+fh = fopen('overlapping.pos','w+');
+
 for i = 1:length(ground_truth)
    currentEntry = ground_truth(i);
    currentExons = currentEntry.exons;
    assert (length(currentEntry.transcripts) == length(currentEntry.exons));
    numberOfEsts = length(currentEntry.transcripts);
 
+   if mod(i,100) == 0
+      fprintf('.')
+   end
+
    for j = 1:length(testrun)
       currentPred = testrun(j);
 
-      if strcmp(currentEntry.chr,currentPred.chr) && currentEntry.is_alt_spliced
+      if ~strcmp(currentEntry.chr,currentPred.chr) || currentEntry.is_alt_spliced
          continue;
       end
 
@@ -41,13 +48,11 @@ for i = 1:length(ground_truth)
          %disp(sprintf('estIdx %i ',estIdx));
          numberOfExons = size(currentExons{estIdx},1);
 
-         for exonIdx = 1:numberOfExons
-            %disp(sprintf('exonIdx %i ',exonIdx));
-            exonStart = currentExons{estIdx}(exonIdx,1);
-            exonStop  = currentExons{estIdx}(exonIdx,2);
+         for exonIdx = 1:(numberOfExons-1)
+            intronStart  = currentExons{estIdx}(exonIdx,2);
+            intronStop   = currentExons{estIdx}(exonIdx+1,1);
 
             for predEstIdx = 1:numberOfPredEsts
-            %disp(sprintf('predEstIdx %i ',predEstIdx));
             numberOfPredExons = size(currentPredExons{predEstIdx},1);
             currentESTName = currentPred.transcripts{predEstIdx};
 
@@ -58,18 +63,19 @@ for i = 1:length(ground_truth)
 
                   %disp('\n');
                   %%disp(sprintf('%i %i %i %i %i %i\n',i,j,estIdx,predEstIdx,exonIdx,predExonIdx));
-
-                  if exonStart >= predExonStart && exonStart <= predExonStop
-                     %%disp('Overlapping');
-                     fprintf(fh,sprintf('%s before %d\n',currentESTName,exonStart-predExonStart))
+               
+                  % est is covering full intron
+                  if intronStart > predExonStart && intronStop < predExonStop
+                     fprintf(fh,sprintf(' %s is completely overlapping from %d %d\n',currentESTName,intronStart,intronStop));
+                  % est is nested inside intron
+                  elseif intronStart < predExonStart && intronStop > predExonStop
+                     fprintf(fh,sprintf(' %s is completely inside intron from %d %d\n',currentESTName,predExonStart,predExonStop));
                   % end of exonth is nested inside predExoniction
-                  elseif exonStop >= predExonStart && exonStop <= predExonStop
-                     %%disp('Overlapping');
-                     fprintf(fh,sprintf('%s after %d\n',currentESTName,predExonStop-exonStop))
+                  elseif intronStart > predExonStart && predExonStop > intronStart && intronStop > predExonStop
+                     fprintf(fh,sprintf('%s is upstream overlapping from %d %d\n',currentESTName,intronStart,predExonStop));
                   % predExoniction is nested inside exonth
-                  elseif exonStart <= predExonStart && predExonStop <= exonStop
-                     %disp('Overlapping');
-                     fprintf('%s %d %d',fh)
+                  elseif intronStart < predExonStart && predExonStart < intronStop && intronStop < predExonStop
+                     fprintf(fh,sprintf('%s is downstream overlapping from %d %d\n',currentESTName,predExonStart,intronStop));
                   else
                      d=1;
                   end