+ removed several index bugs
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Tue, 4 Dec 2007 14:36:29 +0000 (14:36 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Tue, 4 Dec 2007 14:36:29 +0000 (14:36 +0000)
+ added checks to compare against matlab version
+ added convenience functions
TODO
- go on with value checks

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6923 e1793c9e-67f9-0310-80fc-b846ff1f7b36

python/.qpalma.py.swp
python/computeSpliceWeights.py
python/compute_donacc.py
python/penalty_lookup_new.py
python/qpalma.log
python/qpalma.py
python/set_param_palma.py
tools/calculateAlignmentQuality.m

index 88f30fa..67c7d20 100644 (file)
Binary files a/python/.qpalma.py.swp and b/python/.qpalma.py.swp differ
index 762b003..20f9a9b 100644 (file)
@@ -4,92 +4,92 @@
 from numpy import inf
 from numpy.matlib import zeros
 
-def calculateWeights(plf, scores):
-   """
-   3 * 30 supporting points: x_1 ... x_30
-   => 3 * 30 parameters (1 .. 90): y_1 ... y_30
-   piecewise linear function:
-
-   take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
-   these are our values x
-
-          | y_1                                          if x <= x_1
-          |
-          |        x_i+1 - x              x - x_i
-   f(x) = | y_i * -----------  +  y_i+1 * -----------    if x_i <= x <= x_i+1
-          |       x_i+1 - x_i             x_i+1 - x_i
-          |
-          | y_30                                         if x_n <= x 
-
-    y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
-   """
+# 3 * 30 supporting points: x_1 ... x_30
+# => 3 * 30 parameters (1 .. 90): y_1 ... y_30
+# piecewise linear function:
+# 
+# take score from SVM vektor (don_supp: case 1, acc_supp: case 2) and compute length of intron: case 3
+# these are our values x
+# 
+#        | y_1                                          if x <= x_1
+#        |
+#        |        x_i+1 - x              x - x_i
+# f(x) = | y_i * -----------  +  y_i+1 * -----------    if x_i <= x <= x_i+1
+#        |       x_i+1 - x_i             x_i+1 - x_i
+#        |
+#        | y_30                                         if x_n <= x 
+# 
+#  y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
 
+def calculateWeights(plf, scores):
    currentWeight = zeros((30,1))
    
    for k in range(len(scores)):
       value = scores[k]
-      
-      Lower = 0
       Lower = len([elem for elem in plf.limits if elem <= value])
       Upper = Lower+1 ; # x-werte bleiben fest
 
+      print value,Lower,Upper
+
       if Lower == 0:
-         currentWeight[0] = currentWeight[0] + 1;
+         currentWeight[0] = currentWeight[0] + 1
       elif Lower == len(plf.limits):
-         currentWeight[-1] = currentWeight[-1] + 1;
+         currentWeight[-1] = currentWeight[-1] + 1
       else:
-         weightup  = (value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower]) ;
-         weightlow = (plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower]) ;
-         currentWeight[Upper] = currentWeight[Upper] + weightup;
-         currentWeight[Lower] = currentWeight[Lower] + weightlow;
+         weightup  = (value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower])
+         weightlow = (plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower])
+         currentWeight[Upper] = currentWeight[Upper] + weightup
+         currentWeight[Lower] = currentWeight[Lower] + weightlow
 
-   return currentWeight
+      print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
 
+   return currentWeight
 
 def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
-   #assert(isempty(d.transform))
-   #assert(isempty(a.transform))
-   #assert(isempty(h.transform))
-
    ####################################################################################
    # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
    ####################################################################################
 
    # Picke die Positionen raus, an denen eine Donorstelle ist
-   #DonorScores = don_supp(find(SpliceAlign==1)) ;
-   #assert(all(DonorScores~=-Inf)) ;
-
    DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
    assert not ( -inf in DonorScores )
 
+   print 'donor'
    weightDon = calculateWeights(d,DonorScores)
 
    ####################################################################################
    # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
    ####################################################################################
 
-   #AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ; 
-   #assert(all(AcceptorScores~=-Inf)) ;
-
    #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
-   AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if SpliceAlign[pos+1] == 2]
-   #assert not ( -inf in AcceptorScores )
+   AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if pos > 0 and SpliceAlign[pos-1] == 2]
+   assert not ( -inf in AcceptorScores )
 
+   print 'acceptor'
    weightAcc = calculateWeights(a,AcceptorScores)
 
    ####################################################################################
    # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
    ####################################################################################
 
-   #intron_starts =  find(SpliceAlign==1) ;
-   #intron_ends   =  find(SpliceAlign==2) ;
+   intron_starts = []
+   intron_ends   = []
+   for pos,elem in enumerate(SpliceAlign):
+      if elem == 1:
+         intron_starts.append(pos)
+
+      if elem == 2:
+         intron_ends.append(pos)
+
+   assert len(intron_starts) == len(intron_ends)
+
+   for i in range(len(intron_starts)):
+      assert intron_starts[i] < intron_ends[i]
 
-   #%assert: introns do not overlap
-   #assert(length(intron_starts) == length(intron_ends)) ;
-   #assert(all(intron_starts < intron_ends)) ; 
-   #assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ; 
+   values = [0.0]*len(intron_starts)
+   for pos in range(len(intron_starts)):
+      values[pos] = intron_ends[pos] - intron_starts[pos] + 1
 
-   #weightIntron = calculateWeights(h,AcceptorScores)
-   weightIntron = [1,2,3]
+   weightIntron = calculateWeights(h,values)
 
    return weightDon, weightAcc, weightIntron
index b66dfd5..534b1e3 100644 (file)
@@ -2,18 +2,19 @@
 # -*- coding: utf-8 -*-
 
 import math
-from numpy.matlib import zeros
+from numpy.matlib import zeros,inf
+from penalty_lookup_new import *
 
 def compute_donacc(donor_supp, acceptor_supp, d, a):
 
    assert(len(donor_supp)==len(acceptor_supp))
   
-   donor = zeros(len(donor_supp))
-   acceptor= zeros(len(acceptor_supp))
+   donor = zeros((len(donor_supp),1))
+   acceptor= zeros((len(acceptor_supp),1))
 
    for idx in range(len(donor_supp)):
      if donor_supp[idx] == inf:
-       donor[idx] = donor_supp[idx]
+       donor[idx] = donor_supp[idx]
      else:
        donor[idx] = penalty_lookup_new(d, donor_supp[idx])
 
@@ -21,3 +22,5 @@ def compute_donacc(donor_supp, acceptor_supp, d, a):
        acceptor[idx] = acceptor_supp[idx]
      else:
        acceptor[idx] = penalty_lookup_new(a,acceptor_supp[idx])
+
+   return donor,acceptor
index 66df8ee..81ffbc0 100644 (file)
@@ -31,8 +31,10 @@ def penalty_lookup_new(penalty_struct, value):
 
    if count == 0:
       pen = penalties[0]
-   elif count == length(limits):
+   elif count == len(limits):
      pen=penalties[-1]
    else:
      pen = (penalties[count+1]*(value-limits[count]) + penalties[count]\
          *(limits[count+1]-value)) / (limits[count+1]-limits[count])
+
+   return pen
index e69de29..0bf5031 100644 (file)
@@ -0,0 +1,4 @@
+genome_info.basedir is /fml/ag-raetsch/share/projects/palma/elegans_uncut/
+Number of training examples: 4604
+Starting training...Initializing problem...
+Starting training...
index ef82edd..217561b 100644 (file)
@@ -6,7 +6,7 @@ import subprocess
 import scipy.io
 from paths_load_data import *
 import pdb
-import numpy.matlib
+from numpy.matlib import mat,zeros,ones
 from set_param_palma import *
 from computeSpliceAlign import *
 
@@ -14,9 +14,11 @@ from computeSpliceWeights import *
 import QPalmaDP
 from SIQP_CPX import SIQPSolver
 
-from penalty_lookup import *
+from penalty_lookup_new import *
 from compute_donacc import *
 
+from TrainingParam import Param
+
 """
 A training method for the QPalma project
 
@@ -32,26 +34,8 @@ Overall procedure:
 
 """
 
-class Param:
-   
-   def __init__(self):
-      """ default parameters """
-
-      self.basedir = '/fml/ag-raetsch/share/projects/qpalma/elegans_palma'
-      self.MAX_MEM = 31000;
-      self.LOCAL_ALIGN = 1;
-      self.init_param = 1;
-      self.train_with_splicesitescoreinformation = 1;
-      self.train_with_intronlengthinformation = 1;
-      self.C = 0.001;
-      self.microexon = 0;
-      self.prob = 0;
-      self.organism = 'elegans'
-      self.expt = 'training'
-         
-      self.insertion_prob  = self.prob/3 ;
-      self.deletion_prob   = self.prob/3 ;
-      self.mutation_prob   = self.prob/3 ;
+def plog(fh,string):
+   fh.write(string)
 
 def run():
    ARGS = Param()
@@ -92,10 +76,38 @@ def run():
    anzpath = 2
    print_matrix = 0
 
-   param = numpy.matlib.rand(126,1)
+   # param = numpy.matlib.rand(126,1)
+   param = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
+    [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+    [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
+    [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
+    [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
+    [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+    [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+    [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+    [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+    [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+    [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+    [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+    [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+    [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+    [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+    [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+    [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+    [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+    [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+    [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+    [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+    [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+    [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+    [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+    [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
+    [ 0.4177651 ], [ 0.01360547], [ 0.29069319]])
 
    [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation)
 
+   # checked values till this position
+
    # delete splicesite-score-information
    if not ARGS.train_with_splicesitescoreinformation:
       for i in range(len(Acceptors)):
@@ -112,25 +124,22 @@ def run():
    numExamples = N
    C=1.0
 
-   numDonorSupportPoints         = 30
-   numAcceptorSupportPoints      = 30
-   numIntronLengthSupportPoints  = 30 
-   sizeMMatrix                   = 36
-   numQualityScoreSupportPoints  = 16*0
+   numDonSuppPoints     = 30
+   numAccSuppPoints     = 30
+   numLengthSuppPoints  = 30 
+   sizeMMatrix          = 36
+   numQualSuppPoints    = 16*0
 
-   numFeatures = numDonorSupportPoints\
-               + numAcceptorSupportPoints\
-               + numIntronLengthSupportPoints\
-               + sizeMMatrix\
-               + numQualityScoreSupportPoints
+   numFeatures = numDonSuppPoints + numAccSuppPoints + + numLengthSuppPoints\
+               + sizeMMatrix + numQualSuppPoints
 
-   qualityMatrix = [0.0]*numQualityScoreSupportPoints
+   qualityMatrix = zeros((numQualSuppPoints,1))
    
    plog(logfh,'Initializing problem...\n')
+
    #problem = SIQPSolver(numFeatures,numExamples,C,logfile)
-   #num_path = anzpath*ones(1,N) ; # nr of alignments done (best path, second-best path etc.)
-   #gap = zeros(1,N) ; %? sum of differences between true alignment score and 'false' alignment scores
-   #for step_nr=1:iteration_steps,
+   num_path = anzpath*ones((1,N)) ; # nr of alignments done (best path, second-best path etc.)
+   gap = zeros((1,N))
 
    plog(logfh,'Starting training...\n')
 
@@ -152,26 +161,59 @@ def run():
          acc_supp = Acceptors[exampleId] 
 
          # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
+
+         # trueSpliceAlign is equal
+         # trueWeightMatch is equal
          trueSpliceAlign, trueWeightMatch = computeSpliceAlign(dna, exons)
 
+         #print d.limits
+         #print d.penalties
+         #print a.limits
+         #print a.penalties
+         #print h.limits
+         #print h.penalties
+
+         ####################### checked above values ##########################################
+      
          # Calculate
          trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
 
+         #print trueWeightDon.T
+         #print trueWeightAcc.T
+         #print trueWeightIntron.T
+
+         pdb.set_trace()
+
          # Reshape currentW param 
          currentW = zeros((numFeatures,1))
-         currentW[0:numDonorSupportPoints,1] = trueWeigthDon[:]
-         currentW[0:numAcceptorSupportPoints,1] = trueWeigthAcc[:]
-         currentW[0:numIntronLengthSupportPoints,1] = trueWeigthIntron[:]
-         currentW[0:numDonorSupportPoints,1] = trueWeigthMatch[:]
-
-         #currentPhi = d.penalties a h mmatrix
+      
+         currentW[0:numDonSuppPoints,0] = trueWeightDon[:,0]
+         currentW[numDonSuppPoints:numDonSuppPoints+numAccSuppPoints,0] = trueWeightAcc[:,0]
+         currentW[numDonSuppPoints+numAccSuppPoints:numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints,0] = trueWeightIntron[:,0]
+         currentW[numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints:numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints+sizeMMatrix,0] = trueWeightMatch[:,0]
+         currentW[numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints+sizeMMatrix:numDonSuppPoints+numAccSuppPoints+numLengthSuppPoints+sizeMMatrix+numQualSuppPoints,0] = qualityMatrix[:,0]
+
+         currentPhi = zeros((numFeatures,1))
+         currentPhi[0:30]     = d.penalties[:]
+         currentPhi[30:60]    = a.penalties[:]
+         currentPhi[60:90]    = h.penalties[:]
+         currentPhi[90:126]   = mmatrix[:]
+         currentPhi[126:]   = qualityMatrix[:]
 
          # Calculate w'phi(x,y) the total score of the alignment
-         #alignmentScore = currentW.T * currentPhi
+         alignmentScore = currentW.T * currentPhi
 
          #
          # Calculate wrong alignments
          #
+
+         # Compute donor, acceptor with penalty_lookup_new
+         # returns two double lists
+         donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
+       
+         #myalign wants the acceptor site on the g of the ag
+         #acceptor = [acceptor(2:end) -Inf] ;
+
          nr_paths = 2
          dna = 'acgtagct'
          dna_len = len(dna)
@@ -191,7 +233,7 @@ def run():
          currentAlignment = QPalmaDP.Alignment()
 
          qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
-         currentAlignment.setQualityMatrix(qualityMat,numQualitySupportPoints)
+         currentAlignment.setQualityMatrix(qualityMat,numQualSuppPoints)
          ps = h.convert2SWIG()
 
          currentAlignment.myalign( nr_paths, dna, dna_len,\
@@ -200,21 +242,10 @@ def run():
 
          print 'after myalign call...'
 
-         # # Compute donor, acceptor with penalty_lookup_new
-         # # returns two double lists
-         # [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ;
-       
-         # #myalign wants the acceptor site on the g of the ag
-         # acceptor = [acceptor(2:end) -Inf] ;
-         # 
-         # # call myalign    
-         # [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ...
-         #   myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
-         #                 remove_duplicate_scores, print_matrix);
-       
          #  SpliceAlign = double(SpliceAlign') ; %column
          #  weightMatch = double(weightMatch') ;
 
+
       iteration_nr += 1
       break
 
@@ -224,21 +255,6 @@ def run():
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Wrong Alignments
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-       % Compute donor, acceptor with penalty_lookup_new
-       [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ;
-       
-       %myalign wants the acceptor site on the g of the ag
-       acceptor = [acceptor(2:end) -Inf] ;
-         
-       %call myalign    
-       [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ...
-           myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
-                         remove_duplicate_scores, print_matrix);
-       
-       %return values (are int32, have to be double later cause we compute scores
-       SpliceAlign = double(SpliceAlign') ; %column
-       weightMatch = double(weightMatch') ;
-
        %test
        for pfadNo = 1:num_path(id)
          assert(sum(weightMatch(pfadNo,7:end)) == sum(SpliceAlign(pfadNo,:)==0)) ;
@@ -329,7 +345,6 @@ def run():
          sum_xis +=  elem
      
    """
-
    print 'Training completed'
 
 if __name__ == '__main__':
index 1402e0d..761d7ff 100644 (file)
@@ -8,26 +8,32 @@ import pdb
 
 def linspace(a,b,n):
    intervalLength = b-a
-   stepSize = intervalLength / n
+   stepSize = 1.0*intervalLength / (n-1)
    
    interval = [0]*n
-   for i in range(n):
-      interval[i] = a+i*stepSize
+   interval[0] = a
+   interval[-1] = b
+   for i in range(1,n-1):
+      interval[i] = a+(i*stepSize)
 
    return interval
 
-  
 def logspace(a,b,n):
    interval = [0]*n
-   intervalSize = b-a
+   begin = 10.0**a
+   end = 10.0**b
+   intervalSize = 1.0*(b-a)/(n-1)
+   interval[0] = begin
+   interval[-1] = end
 
-   interval[n-1] = b
-
-   for i in range(n-2,0,-1):
-      interval[i] = interval[i+1] / math.e
+   for i in range(1,n-1):
+      interval[i] = 10**(a+i*intervalSize)
 
    return interval
 
+def log10(x):
+   return math.log(x)/math.log(10)
+
 class Plf: #means piecewise linear function
 
    def __init_(self):
@@ -99,7 +105,6 @@ def set_param_palma(param, train_with_intronlengthinformation,\
       min_svm_score=-5
       max_svm_score=5
 
-   
    h = Plf()
    d = Plf()
    a = Plf()
@@ -108,7 +113,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Gapfunktion
    ####################
    h.limits    = logspace(math.log(min_intron_len,10),math.log(max_intron_len,10),30) 
-   h.penalties = param[1:30] 
+   h.penalties = param[0:30] 
    #h.transform = '+1' 
    h.transform = '' 
    h.name      = 'h' 
@@ -123,7 +128,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Donorfunktion
    ####################
    d.limits    = linspace(min_svm_score,max_svm_score,30) 
-   d.penalties = param[31:60]
+   d.penalties = param[30:60]
    #d.transform = '+1' 
    d.transform = '' 
    d.name      = 'd' 
@@ -138,7 +143,7 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    # Acceptorfunktion
    ####################
    a.limits    = linspace(min_svm_score,max_svm_score,30) 
-   a.penalties = param[61:90]
+   a.penalties = param[60:90]
    #a.transform = '+1' 
    a.transform = '' 
    a.name      = 'a' 
@@ -156,3 +161,12 @@ def set_param_palma(param, train_with_intronlengthinformation,\
    mmatrix.reshape(6,6) 
 
    return h,d,a,mmatrix
+
+if __name__ == '__main__':
+   #min_intron_len=20
+   #max_intron_len=1000
+   #print logspace(math.log(min_intron_len,10),math.log(max_intron_len,10),30) 
+
+   min_svm_score=-5
+   max_svm_score=5
+   print linspace(min_svm_score,max_svm_score,30) 
index a26989b..84bc4d4 100644 (file)
@@ -1,52 +1,80 @@
-load confirmed_sequences.mat;
+%
+% This script compares two alignments based on their EST overlaps
+%
+% The ground_truth and testrun
+%
+load /fml/ag-raetsch/home/fabio/tmp/zebrafish/confirmed_sequences.mat;
 testrun = genes;
 load /fml/ag-raetsch/share/projects/altsplicedata/zebrafish/confirmed_sequences.mat;
 ground_truth = genes;
 clear genes;
 
+fh = fopen('overlapping.pos','w+')
 for i = 1:length(ground_truth)
-
-   currentTru = ground_truth(i);
-   currentTruExons = currentTru.exons{1};
+   currentEntry = ground_truth(i);
+   currentExons = currentEntry.exons;
+   assert (length(currentEntry.transcripts) == length(currentEntry.exons));
+   numberOfEsts = length(currentEntry.transcripts);
 
    for j = 1:length(testrun)
       currentPred = testrun(j);
 
-      if strcmp(currentTru.chr,currentPred.chr) && strcmp(currentTru.strands,currentPred.strands)
+      if strcmp(currentEntry.chr,currentPred.chr) && currentEntry.is_alt_spliced
          continue;
       end
 
-      truStart = currentTru.start;
-      truStop  = currentTru.stop;
+      start = currentEntry.start;
+      stop  = currentEntry.stop;
       predStart = currentPred.start;
       predStop = currentPred.stop;
 
-      if predStop < truStart || predStart > truStop
+      % if both entries are not overlapping at all
+      if predStop < start || predStart > stop
          continue;
       end
 
-      currentPredExons = currentPred.exons{1};
-
-      rows = size(currentTruExons,1);
-      for ii = 1:rows
-         exonStart = currentTruExons(ii,1);
-         exonStop = currentTruExons(ii,2);
-         for jj = 1:size(currentPredExons,1)
-            predExonStart = currentPredExons(jj,1);
-            predExonStop = currentPredExons(jj,2);
-
-            if exonStart >= predExonStart && exonStart <= predExonStop
-               disp('Overlapping');
-            % end of exonth is nested inside predExoniction
-            elseif exonStop >= predExonStart && exonStop <= predExonStop
-               disp('Overlapping');
-            % predExoniction is nested inside exonth
-            elseif exonStart <= predExonStart && predExonStop <= exonStop
-               disp('Overlapping');
-            else
-               d=1;
+      currentPredExons = currentPred.exons;
+      numberOfPredEsts = length(currentPredExons);
+
+      % Loop over all ESTs in ground truth and testrun
+      for estIdx = 1:numberOfEsts
+         %disp(sprintf('estIdx %i ',estIdx));
+         numberOfExons = size(currentExons{estIdx},1);
+
+         for exonIdx = 1:numberOfExons
+            %disp(sprintf('exonIdx %i ',exonIdx));
+            exonStart = currentExons{estIdx}(exonIdx,1);
+            exonStop  = currentExons{estIdx}(exonIdx,2);
+
+            for predEstIdx = 1:numberOfPredEsts
+            %disp(sprintf('predEstIdx %i ',predEstIdx));
+            numberOfPredExons = size(currentPredExons{predEstIdx},1);
+            currentESTName = currentPred.transcripts{predEstIdx};
+
+               for predExonIdx = 1:numberOfPredExons
+                  %%disp(sprintf('predExonIdx %i ',predExonIdx));
+                  predExonStart = currentPredExons{predEstIdx}(predExonIdx,1);
+                  predExonStop  = currentPredExons{predEstIdx}(predExonIdx,2);
+
+                  %disp('\n');
+                  %%disp(sprintf('%i %i %i %i %i %i\n',i,j,estIdx,predEstIdx,exonIdx,predExonIdx));
+
+                  if exonStart >= predExonStart && exonStart <= predExonStop
+                     %%disp('Overlapping');
+                     fprintf(fh,sprintf('%s before %d\n',currentESTName,exonStart-predExonStart))
+                  % end of exonth is nested inside predExoniction
+                  elseif exonStop >= predExonStart && exonStop <= predExonStop
+                     %%disp('Overlapping');
+                     fprintf(fh,sprintf('%s after %d\n',currentESTName,predExonStop-exonStop))
+                  % predExoniction is nested inside exonth
+                  elseif exonStart <= predExonStart && predExonStop <= exonStop
+                     %disp('Overlapping');
+                     fprintf('%s %d %d',fh)
+                  else
+                     d=1;
+                  end
+               end
             end
-            
          end
       end
    end