+ added palma_utils file
authorFabio <fabio@congo.fml.local>
Thu, 2 Oct 2008 16:20:05 +0000 (18:20 +0200)
committerFabio <fabio@congo.fml.local>
Thu, 2 Oct 2008 16:20:05 +0000 (18:20 +0200)
+ added some license texts

qpalma/palma_utils.py [new file with mode: 0644]
qpalma/utils.py
scripts/PipelineHeuristic.py

diff --git a/qpalma/palma_utils.py b/qpalma/palma_utils.py
new file mode 100644 (file)
index 0000000..a8ade88
--- /dev/null
@@ -0,0 +1,581 @@
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# Written (W) 2006-2007 Uta Schulze, Gunnar Raetsch
+# Copyright (C) 2006-2007 Max-Planck-Society
+# 
+
+def set_param(model):
+    import numpy
+
+    class plf: #means piecewise linear function
+      len = 0
+      limits = []
+      penalties = []
+      transform = ''
+      name = ''
+      max_len = 0
+      min_len = 0
+
+    h = plf()
+    h.len = int(model.intron_len_bins) ;
+    h.limits = model.intron_len_limits ;
+    h.penalties = model.intron_len_penalties ;
+    h.name = 'h'
+    h.max_len   = int(model.intron_len_max)
+    h.min_len   = int(model.intron_len_min)
+    h.transform = model.intron_len_transform
+
+    d = plf()
+    d.len = int(model.donor_bins)
+    d.limits = model.donor_limits
+    d.penalties = model.donor_penalties 
+    d.name = 'd'
+    d.max_len   = 100
+    d.min_len   = -100
+
+    a = plf()
+    a.len = int(model.acceptor_bins)
+    a.limits = model.acceptor_limits 
+    a.penalties = model.acceptor_penalties 
+    a.name = 'a'
+    a.max_len   = 100
+    a.min_len   = -100
+
+    mmatrix = model.substitution_matrix 
+    return h,d,a,mmatrix
+#<------------------------------------------------------------------------------>
+
+
+#<------------------------------------------------------------------------------>
+# gives back y-value acc, to "support"-value (piecewise linear function)
+#<------------------------------------------------------------------------------>
+def penalty_lookup(plf, supp_value):
+       import numpy
+       limits    = plf.limits
+       penalties = plf.penalties
+       transform = plf.transform
+
+       if transform == '':
+               supp_value = supp_value
+       elif transform == 'log':
+               print 'log'
+               supp_value = numpy.log(supp_value)
+       elif transform == 'log(+3)':
+               print 'log(+3)'
+               supp_value = numpy.log(supp_value+3)
+       elif transform == 'log(+1)':
+               print 'log(+1)'
+               supp_value = numpy.log(supp_value+1)
+       elif transform == '(+3)':
+               print '(+3)'
+               supp_value = supp_value+3
+       elif transform == 'mod3':
+               print 'mod3'
+               supp_value = numpy.mod(supp_value,3)
+       else:
+               raise NameError, 'unknown penalty transform'
+
+       #print 'supp_value = %i\n' %supp_value
+       
+       if supp_value <= limits[0]:
+               score = penalties[0]
+               #print 'supp_value <= limits[0]'
+       elif supp_value >= limits[plf.len-1]:
+               score = penalties[plf.len-1]
+               #print 'supp_value >= limits[len(limits)-1]'
+       else:
+               for cnt in range(plf.len):
+                       if supp_value <= limits[cnt]:
+                               upper_idx = cnt
+                               break
+               lower_idx = upper_idx-1
+               #print '%1.3f <= %1.3f <= %1.3f\n' %(limits[lower_idx], supp_value, limits[upper_idx])
+               score = ((supp_value-limits[lower_idx])*penalties[upper_idx] + (limits[upper_idx]-supp_value)*penalties[lower_idx])/(limits[upper_idx]-limits[lower_idx])     
+
+       return score
+#<------------------------------------------------------------------------------>
+
+
+#<------------------------------------------------------------------------------>
+# counts matches, mismatches, gaps on DNA (means bases inserted in query, qGapb)
+# and gaps on EST (means baes inserted in transcript, tGapb)
+#
+#   matchmatrix (columnwise)
+#     - A C G T N (dna)
+#   - x x x x x x
+#   A x x x x x x      
+#   C x x x x x x
+#   G x x x x x x
+#   T x x x x x x
+#   N x x x x x x
+# (est)
+#<------------------------------------------------------------------------------>
+def evaluate_mmatrix(mmatrix):
+    import numpy
+    #7: [a,a], 14:[c,c], 21:[g,g], 28:[t,t], 35:[n.n]
+    match = mmatrix[7] + mmatrix[14] + mmatrix[21] + mmatrix[28] + mmatrix[35]
+    qGapb = numpy.sum(mmatrix[1:6]) #gaps on DNA
+    tGapb = mmatrix[6] + mmatrix[12] + mmatrix[18] + mmatrix[24] + mmatrix[30] #gaps on EST
+    mism  = numpy.sum(mmatrix) - (match + qGapb + tGapb)
+
+    if 0:
+       print 'matches: %d' %match
+       print 'mismatches: %d' %mism
+       print 'qGapb (gaps on DNA): %d' %qGapb
+       print 'tGapb (gaps on EST): %d' %tGapb
+    
+    return match, mism, qGapb, tGapb
+#<------------------------------------------------------------------------------>
+
+
+#<------------------------------------------------------------------------------>
+# qStart, qEnd, tStart, tEnd, blocks, qStarts, tStarts
+#<------------------------------------------------------------------------------>
+def get_splice_info(splice_align, est_align):
+    import sys
+    #problem: because of the dangling ends, alignment can start with an intron
+    
+    #print splice_align
+    #print est_align   
+
+    dna_len = len(splice_align)
+    est_len = len(est_align)
+    qStart = 1 #(default)
+    qEnd   = est_len #(default)
+    tStart = 1 #(default)
+    tEnd   = dna_len #(default)
+    qStarts = []
+    qEnds   = []
+    qExonSizes = []
+    tStarts = []
+    tEnds   = []
+    tExonSizes = []
+
+    #<---------------------------------------------------------->
+    #query: dangling start
+    if est_align[0]==4:
+       for i in range(est_len):
+           if est_align[i]!=4:
+               qStart = i+1 #+1 bc index starts with zero
+               break
+
+    #query: dangling end
+    if est_align[est_len-1]==4:
+       list_idx = range(est_len)
+       list_idx.reverse()
+       for i in list_idx:
+           if est_align[i]!=4:
+               qEnd = i+1 #last "4", +1 bc index starts with zero
+               break
+    #qStarts: list of exon starting points on EST
+    #qEnds:   list of exon ending points on EST
+    #(2 in est_align means: first nucleotideof new exon)
+   
+    index_count = 0
+    exon_label = est_align[qStart-1]
+    qStarts.append(qStart)
+    for i in range(qStart-1,qEnd):
+       if (est_align[i]==1) and (exon_label==2): #new exon labeled "1"
+           exon_label = 1
+           qStarts.append(i+1) #+1 bc index starts with zero, start of this exon saved
+           qEnds.append(i-1+1) #end of last exon labeled "2" saved
+           qExonSizes.append(qEnds[index_count] - qStarts[index_count] +1) #length of  last exon
+           index_count+=1
+       elif (est_align[i]==2) and (exon_label==1): #new exon labeled "2"
+           exon_label = 2
+           qStarts.append(i+1) #+1 bc index starts with zero, start of this exon saved
+           qEnds.append(i-1+1) #end of last exon labeled "2" saved
+           qExonSizes.append(qEnds[index_count] - qStarts[index_count] +1) #length of  last exon
+           index_count+=1
+       if (i == qEnd-1): #end of last exon
+           qEnds.append(i+1) #end of last exon saved
+           qExonSizes.append(qEnds[index_count] - qStarts[index_count] +1) #length of  last exon          
+                  
+    assert(len(qStarts)==len(qEnds))
+    
+    if 0:
+       print 'qStart: %d' %qStart
+       print 'qEnd:   %d' %qEnd
+       print qStarts
+       print qEnds
+       print qExonSizes
+    #<---------------------------------------------------------->
+    
+    #<---------------------------------------------------------->          
+    #transcript: dangling start
+    if splice_align[0] == 4:
+       for i in range(dna_len):
+           if splice_align[i]!=4:
+               tStart = i+1 #+1 bc index starts with zero
+               break
+    #if splice_align[tStart-1]==1: #alignment starts with intron
+       #print 'alignment starts with intron'
+
+    #transcript: dangling end
+    if splice_align[dna_len-1]==4:
+       list_idx = range(dna_len)
+       list_idx.reverse()
+       for i in list_idx:
+           if splice_align[i]!=4:
+               tEnd = i+1 #+1 bc index starts with zero
+               break   
+    #if splice_align[tEnd-1]==2: #alignment ends with intron
+       #print 'alignment ends with intron'
+
+    #tStarts: list of exon starting points on DNA
+    #tEnds:   list of exon ending points on DNA
+   
+    index_count = 0
+    act_state = 3 #3 means intron, 0 means exon
+    for i in range(tStart-1,tEnd):
+       if (splice_align[i]==0) and (act_state==3): #first base of exon
+           tStarts.append(i+1) #+1 bc index starts with zero
+           act_state=0 #exon
+       elif (splice_align[i]==1) and (act_state==0): #exon ended one base ago
+           tEnds.append(i-1+1) #+1 bc index starts with zero
+           act_state=3 #intron
+           tExonSizes.append(tEnds[index_count] - tStarts[index_count] +1) #length
+           index_count+=1
+       if (i==tEnd-1) and (act_state==0): #end of alignment reached and still in exon
+           tEnds.append(i+1) #+1 bc index starts with zero
+           tExonSizes.append(tEnds[index_count] - tStarts[index_count] +1) #length
+
+    #<---------------------------------------------------------->
+
+           
+
+    if (len(tStarts)!=len(tEnds)):
+       print len(tStarts)
+       print len(tEnds)
+       print 'tStart: %d' %tStart
+       print 'tEnd:   %d' %tEnd 
+       print tStarts
+       print tEnds
+       sys.exit(2)
+
+    #print 'exons on query: %d, exons on target: %d' %(len(qStarts),len(tStarts))
+    num_exons = len(tStarts)
+    #print num_exons
+    #print len(qStarts)
+
+    if 0:
+       print 'tStart: %d' %tStart
+       print 'tEnd:   %d' %tEnd 
+       print tStarts
+       print tEnds
+       print tExonSizes
+
+   # assert(num_exons==len(qStarts))
+
+    return qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds, tExonSizes, tStarts, tEnds
+#<------------------------------------------------------------------------------>
+
+""" read fasta as dictionary """
+def read_fasta(f):
+       import sys
+       fasta=dict()
+       for s in f.readlines():
+               if s.startswith('>'):
+                       key=s[1:-1]
+                       print key
+                       if fasta.has_key(key):
+                               sys.stderr.write("ERROR: duplicated sequence name '%s'\n" %(key))
+                               sys.exit(2)
+                       fasta[key]=""
+               else:
+                       fasta[key]+=s[:-1]
+
+       return fasta
+
+#<------------------------------------------------------------------------------>
+#
+# DNA and EST with gaps and est with intron
+# len(dna) == len(est)
+# identity: list of length exon_numbers: #matches / length(exon_with_gaps)
+#<------------------------------------------------------------------------------>
+def comp_identity(DNA, EST, strand, qStart, tStart, translation, comparison_char):
+    def set_idx(i,qNum,tNum):
+       qIDX[i] = qNum
+       tIDX[i] = tNum
+       return qIDX, tIDX
+    
+    gap_char = translation[0] #e.g. "-"
+    in_char = translation[6] #e.g. "*", intron char
+    equ_char = comparison_char[0] #match char e.g. "|"
+    comp_in_char = comparison_char[1] #intron char when comparing 
+    comparison = [' '] *len(EST)
+    qIDX = [-1] *len(EST)
+    tIDX = [-1] *len(DNA)
+    exon_idx = 0
+    matches = []
+    gaps = []
+    exon_length = []
+    identity = []
+    exonQuality = [] # matches - 2*gaps
+    ssQuality = []
+    state = 3 #intron:3, exon:0
+    EST_started = 0
+    
+    for i in range(len(DNA)):
+      if (i>0 and i<len(DNA)-1):
+       if (EST[i] == in_char): #intron
+        (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1]+1)
+        comparison[i] = comp_in_char
+        if (state==0): #exon finished
+            identity.append(float(matches[exon_idx])/float(exon_length[exon_idx])) #save identity for finished exon
+            exonQuality.append(float(matches[exon_idx]-2*gaps[exon_idx])/float(exon_length[exon_idx])) #save identity for finished exon
+            exon_idx+=1 #next exon starts sooner or later
+            state = 3 #now intron
+            last_ssQuality = 0.0 ;
+            last_ssLen = 0 ;
+            for k in range(5):
+                if i-k-1<0: break 
+                last_ssLen+=1
+                if DNA[i-k-1]==EST[i-k-1]: last_ssQuality+=1.0 
+                else:
+                    if (DNA[i-k-1]=='N' and EST[i-k-1]!=gap_char) or (DNA[i-k-1]!=gap_char and EST[i-k-1]=='N'): last_ssQuality+=0.5
+                #print k,DNA[i-k-1],EST[i-k-1],last_ssQuality
+       else: #exon
+        if (state==0): #still in same exon
+            if EST_started and DNA[i]!=gap_char: # only count DNA length
+                exon_length[exon_idx] += 1 #one bp added
+            if EST[i]!=gap_char: EST_started=1 ;
+            #print i,EST[i],EST_started,exon_length[exon_idx]
+            if (DNA[i]==EST[i]):
+                matches[exon_idx]+=1 #one match added
+            else:
+                if (DNA[i]=='N' and EST[i]!=gap_char) or (EST[i]=='N' and DNA[i]!=gap_char): matches[exon_idx]+=0.5
+        else: #new exon
+            for k in range(5):
+                if i+k>=len(DNA) or i+k>len(EST): break 
+                last_ssLen+=1 ;
+                if DNA[i+k]==EST[i+k]: last_ssQuality+=1.0 ;
+                else:
+                    if (DNA[i+k]=='N' and EST[i+k]!=gap_char) or (DNA[i+k]!=gap_char and EST[i+k]=='N'): last_ssQuality+=0.5
+                #print k,DNA[i+k],EST[i+k],last_ssQuality
+            ssQuality.append(last_ssQuality/last_ssLen) 
+            if DNA[i]!=gap_char: exon_length.append(1)
+            else: exon_length.append(0)
+            state = 0 #now exon
+            if (DNA[i]==EST[i]):
+             gaps.append(0)
+             matches.append(1)
+            else:
+             if (DNA[i]=='N' and EST[i]!=gap_char) or (EST[i]=='N' and DNA[i]!=gap_char): matches.append(0.5); gaps.append(0)
+             else: matches.append(0); gaps.append(1) 
+
+        if (DNA[i]==EST[i]) and (DNA[i]!='-'): #match
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1]+1)
+            comparison[i] = equ_char    
+        elif (DNA[i]==EST[i]) and (DNA[i]=='-'): #strange match
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1])
+        elif (EST[i]==gap_char): #gap on est
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1]+1)
+        elif (DNA[i]==gap_char): #gap on dna
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1])
+        else: #mismatch
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1]+1)
+
+      elif (i==0):
+       if (EST[i] == in_char): #alignment starts with intron
+        (qIDX, tIDX) = set_idx(i,0,1)
+        comparison[i] = comp_in_char
+       else: #alignment starts with exon
+        state = 0 #go into exon_state
+        if DNA[i]!=gap_char: exon_length.append(1)
+        else: exon_length.append(0)
+        if (DNA[i]==EST[i]) and (DNA[i]!='-'): #match
+            #(qIDX, tIDX) = set_idx(i,1,1)
+            EST_started = 1 
+            (qIDX, tIDX) = set_idx(i,qStart,tStart)
+            matches.append(1)
+            gaps.append(0)
+            comparison[i] = equ_char
+        elif (DNA[i]==EST[i]) and (DNA[i]=='-'): #strange match
+            (qIDX, tIDX) = set_idx(i,qStart,tStart)
+            matches.append(0)
+            gaps.append(1)
+        elif (EST[i]==gap_char): #gap on est
+            #(qIDX, tIDX) = set_idx(i,0,1)
+            (qIDX, tIDX) = set_idx(i,qStart,tStart)
+            matches.append(0)
+            gaps.append(1)
+        elif (DNA[i]==gap_char): #gap on dna
+            #(qIDX, tIDX) = set_idx(i,1,0)
+            (qIDX, tIDX) = set_idx(i,qStart,tStart)
+            matches.append(0)
+            gaps.append(1)
+        else: #mismatch
+            #(qIDX, tIDX) = set_idx(i,1,1)
+            (qIDX, tIDX) = set_idx(i,qStart,tStart)
+            gaps.append(0)
+            if (DNA[i]=='N' or EST[i]=='N'): matches.append(0.5)
+            else: matches.append(0.5)
+      elif (i==len(DNA)-1):
+       if (EST[i] == in_char): #alignment ends with intron, with exons everything ok
+        assert(state==3) #ok, because intron longer than 4
+        comparison[i] = comp_in_char
+        (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1]+1)
+       else: #alignment ends with exon, exon finished
+        if (state==0): #finish exon
+           if DNA[i]!=gap_char: exon_length[exon_idx] += 1 #one bp added
+           else: exon_length[exon_idx] += 1 #one bp added
+           if (DNA[i]==EST[i]) and (DNA[i]!=gap_char): #match
+            matches[exon_idx]+=1 #one match added
+            comparison[i] = equ_char
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1]+1)
+           elif (DNA[i]==EST[i]) and (DNA[i]==gap_char): #strange match
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1])
+           elif (EST[i]==gap_char): #gap on est
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1]+1)
+           elif (DNA[i]==gap_char): #gap on dna
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1])
+           else: #mismatch
+            if (DNA[i]=='N' or EST[i]=='N'): matches[exon_idx]+=0.5
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1]+1)
+           identity.append(float(matches[exon_idx])/float(exon_length[exon_idx])) #save identity for finished exon
+           exonQuality.append(float(matches[exon_idx]-2*gaps[exon_idx])/float(exon_length[exon_idx])) #save identity for finished exon
+        else: #last exon has length 1
+           if DNA[i]!=gap_char: exon_length.append(1)
+           else: exon_length.append(1)
+           if (DNA[i]==EST[i]): #match
+            matches.append(1) #one match added
+            gaps.append(0) 
+            comparison[i] = equ_char
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1]+1)
+           elif (EST[i]==gap_char): #gap on est
+            gaps.append(1) 
+            matches.append(0)
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1],tIDX[i-1]+1)
+           elif (DNA[i]==gap_char): #gap on dna
+            gaps.append(1) 
+            matches.append(0)
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1])
+           else: #mismatch
+            gaps.append(0) 
+            if (DNA[i]=='N' or EST[i]=='N'): matches.append(0.5)
+            else: matches.append(0)
+            (qIDX, tIDX) = set_idx(i,qIDX[i-1]+1,tIDX[i-1]+1)
+           identity.append(float(matches[exon_idx])/float(exon_length[exon_idx])) #save identity for finished exon
+           exonQuality.append(float(matches[exon_idx]-2*gaps[exon_idx])/float(exon_length[exon_idx])) #save identity for finished exon
+      else:
+        print 'this "else" should not happen'
+
+    comparison = reduce(lambda x,y: x+y, comparison)
+    #print qIDX
+    #print tIDX
+    #print exon_length
+    #print ssQuality, exonQuality, identity
+
+    return exon_length, identity, ssQuality, exonQuality, comparison, qIDX, tIDX
+#<------------------------------------------------------------------------------>
+
+
+#<------------------------------------------------------------------------------>
+# 
+#<------------------------------------------------------------------------------>
+def reverse_complement(DNA):
+    import string
+    import sys
+    dict = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}
+    
+    DNA = DNA.upper()
+    reverse_DNA = ''
+    cnt_list = range(len(DNA))
+    cnt_list.reverse()
+    for i in cnt_list:
+      if not dict.has_key(DNA[i]):
+        print 'unknown letter', DNA[i], '-> translating to N'
+        reverse_DNA = reverse_DNA + 'N' ;
+      else:
+        reverse_DNA = reverse_DNA + dict[DNA[i]]
+    return reverse_DNA
+#<------------------------------------------------------------------------------>
+
+
+#<------------------------------------------------------------------------------>
+#
+#<------------------------------------------------------------------------------>
+def parse_wublast(output, e_value_bound):
+    import numpy
+    import sys
+    
+    strand_matrix='- +'
+
+    class wublast_info:
+        score = []
+        q_name = []
+        t_name = []
+        q_strand = []
+        t_strand = []
+        q_start = []
+        q_end = []
+        t_start = []
+        t_end = []
+
+    est_entry = 0
+    still_in_group = 0
+    
+    est_wbi = wublast_info()
+    for line in output:
+        fields = line.strip().split('\t')
+        #print line, still_in_group
+        if float(fields[2]) <= e_value_bound:
+            if len(est_wbi.q_name)>0:
+                flag = (fields[0] == est_wbi.q_name[len(est_wbi.t_name)-1]) and (fields[1] == est_wbi.t_name[len(est_wbi.t_name)-1]) and (int(fields[3]) == group_size)
+            else:
+                flag = True 
+
+            if (still_in_group == 0) or (not flag):
+                if (not still_in_group == 0):
+                    sys.stderr.write('blastn output format problem near line:\n'+line)
+
+                # start a new group
+                est_wbi.score.append(float(fields[4]))
+                est_wbi.q_name.append(fields[0])
+                est_wbi.t_name.append(fields[1])
+                est_wbi.q_strand.append(strand_matrix[int(fields[16])+1])
+                est_wbi.t_strand.append(strand_matrix[int(fields[19])+1])
+                if strand_matrix[int(fields[16])+1] == '+':
+                    est_wbi.q_start.append(int(fields[17]))
+                    est_wbi.q_end.append(int(fields[18]))
+                else:
+                    est_wbi.q_start.append(int(fields[18]))
+                    est_wbi.q_end.append(int(fields[17]))
+                est_wbi.t_start.append(int(fields[20]))
+                est_wbi.t_end.append(int(fields[21]))
+                still_in_group = int(fields[3])-1
+                #print 'still_in_group', still_in_group
+                group_size = int(fields[3])
+            else:
+                # extend a group
+                assert(fields[0] == est_wbi.q_name[len(est_wbi.t_name)-1]) 
+                assert(fields[1] == est_wbi.t_name[len(est_wbi.t_name)-1]) 
+                assert(int(fields[3]) == group_size) 
+                est_wbi.score[len(est_wbi.score)-1] += float(fields[4])
+                if strand_matrix[int(fields[16])+1] == '+':
+                    est_wbi.q_start[len(est_wbi.q_start)-1] = numpy.minimum(est_wbi.q_start[len(est_wbi.q_start)-1],int(fields[17]))
+                    est_wbi.q_end[len(est_wbi.q_end)-1] =numpy.maximum(est_wbi.q_end[len(est_wbi.q_end)-1],int(fields[18]))
+                else:
+                    est_wbi.q_start[len(est_wbi.q_start)-1] =numpy.minimum(est_wbi.q_start[len(est_wbi.q_start)-1],int(fields[18]))
+                    est_wbi.q_end[len(est_wbi.q_end)-1] =numpy.maximum(est_wbi.q_end[len(est_wbi.q_end)-1],int(fields[17]))
+                est_wbi.t_start[len(est_wbi.t_start)-1] =numpy.minimum(est_wbi.t_start[len(est_wbi.t_start)-1],int(fields[20]))
+                est_wbi.t_end[len(est_wbi.t_end)-1] =numpy.maximum(est_wbi.t_end[len(est_wbi.t_end)-1],int(fields[21]))
+                still_in_group -= 1
+
+    if 0:
+        print est_wbi.q_name
+        print est_wbi.t_name
+        print est_wbi.q_strand
+        print est_wbi.t_strand
+        print est_wbi.q_start
+        print est_wbi.q_end
+        print est_wbi.t_start
+        print est_wbi.t_end
+
+    return est_wbi
+#<------------------------------------------------------------------------------>
index f80f7ad..ea3f52f 100644 (file)
@@ -1,5 +1,10 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
 
 import pdb
 import sys
@@ -7,8 +12,7 @@ import os.path
 
 from numpy.matlib import mat,zeros,ones,inf
 
-import palma.palma_utils as pu
-from palma.output_formating import print_results
+import palma_utils as pu
 
 # some useful constants
 
index 47dae45..3fd1418 100644 (file)
@@ -1,6 +1,14 @@
 #!/usr/bin/env python 
 # -*- coding: utf-8 -*- 
 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Gunnar R├Ątsch, Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
+
 import cPickle
 import sys
 import pdb