+ removed several index bugs
[qpalma.git] / tools / calculateAlignmentQuality.m
index a26989b..84bc4d4 100644 (file)
@@ -1,52 +1,80 @@
-load confirmed_sequences.mat;
+%
+% This script compares two alignments based on their EST overlaps
+%
+% The ground_truth and testrun
+%
+load /fml/ag-raetsch/home/fabio/tmp/zebrafish/confirmed_sequences.mat;
 testrun = genes;
 load /fml/ag-raetsch/share/projects/altsplicedata/zebrafish/confirmed_sequences.mat;
 ground_truth = genes;
 clear genes;
 
+fh = fopen('overlapping.pos','w+')
 for i = 1:length(ground_truth)
-
-   currentTru = ground_truth(i);
-   currentTruExons = currentTru.exons{1};
+   currentEntry = ground_truth(i);
+   currentExons = currentEntry.exons;
+   assert (length(currentEntry.transcripts) == length(currentEntry.exons));
+   numberOfEsts = length(currentEntry.transcripts);
 
    for j = 1:length(testrun)
       currentPred = testrun(j);
 
-      if strcmp(currentTru.chr,currentPred.chr) && strcmp(currentTru.strands,currentPred.strands)
+      if strcmp(currentEntry.chr,currentPred.chr) && currentEntry.is_alt_spliced
          continue;
       end
 
-      truStart = currentTru.start;
-      truStop  = currentTru.stop;
+      start = currentEntry.start;
+      stop  = currentEntry.stop;
       predStart = currentPred.start;
       predStop = currentPred.stop;
 
-      if predStop < truStart || predStart > truStop
+      % if both entries are not overlapping at all
+      if predStop < start || predStart > stop
          continue;
       end
 
-      currentPredExons = currentPred.exons{1};
-
-      rows = size(currentTruExons,1);
-      for ii = 1:rows
-         exonStart = currentTruExons(ii,1);
-         exonStop = currentTruExons(ii,2);
-         for jj = 1:size(currentPredExons,1)
-            predExonStart = currentPredExons(jj,1);
-            predExonStop = currentPredExons(jj,2);
-
-            if exonStart >= predExonStart && exonStart <= predExonStop
-               disp('Overlapping');
-            % end of exonth is nested inside predExoniction
-            elseif exonStop >= predExonStart && exonStop <= predExonStop
-               disp('Overlapping');
-            % predExoniction is nested inside exonth
-            elseif exonStart <= predExonStart && predExonStop <= exonStop
-               disp('Overlapping');
-            else
-               d=1;
+      currentPredExons = currentPred.exons;
+      numberOfPredEsts = length(currentPredExons);
+
+      % Loop over all ESTs in ground truth and testrun
+      for estIdx = 1:numberOfEsts
+         %disp(sprintf('estIdx %i ',estIdx));
+         numberOfExons = size(currentExons{estIdx},1);
+
+         for exonIdx = 1:numberOfExons
+            %disp(sprintf('exonIdx %i ',exonIdx));
+            exonStart = currentExons{estIdx}(exonIdx,1);
+            exonStop  = currentExons{estIdx}(exonIdx,2);
+
+            for predEstIdx = 1:numberOfPredEsts
+            %disp(sprintf('predEstIdx %i ',predEstIdx));
+            numberOfPredExons = size(currentPredExons{predEstIdx},1);
+            currentESTName = currentPred.transcripts{predEstIdx};
+
+               for predExonIdx = 1:numberOfPredExons
+                  %%disp(sprintf('predExonIdx %i ',predExonIdx));
+                  predExonStart = currentPredExons{predEstIdx}(predExonIdx,1);
+                  predExonStop  = currentPredExons{predEstIdx}(predExonIdx,2);
+
+                  %disp('\n');
+                  %%disp(sprintf('%i %i %i %i %i %i\n',i,j,estIdx,predEstIdx,exonIdx,predExonIdx));
+
+                  if exonStart >= predExonStart && exonStart <= predExonStop
+                     %%disp('Overlapping');
+                     fprintf(fh,sprintf('%s before %d\n',currentESTName,exonStart-predExonStart))
+                  % end of exonth is nested inside predExoniction
+                  elseif exonStop >= predExonStart && exonStop <= predExonStop
+                     %%disp('Overlapping');
+                     fprintf(fh,sprintf('%s after %d\n',currentESTName,predExonStop-exonStop))
+                  % predExoniction is nested inside exonth
+                  elseif exonStart <= predExonStart && predExonStop <= exonStop
+                     %disp('Overlapping');
+                     fprintf('%s %d %d',fh)
+                  else
+                     d=1;
+                  end
+               end
             end
-            
          end
       end
    end