+ using correct interface now
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sun, 2 Dec 2007 20:11:22 +0000 (20:11 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sun, 2 Dec 2007 20:11:22 +0000 (20:11 +0000)
+ added some assertions for range checks
+ added two new python scripts translated from the matlab version
TODO
- dimension and range checking for all other structures/array after set_param_palma
- decide between numpy and lists for smaller arrays

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6913 e1793c9e-67f9-0310-80fc-b846ff1f7b36

python/.qpalma.py.swp
python/compute_donacc.py [new file with mode: 0644]
python/penalty_lookup_new.py [new file with mode: 0644]
python/qpalma.py
python/set_param_palma.py

index 4f1759f..b87f9e8 100644 (file)
Binary files a/python/.qpalma.py.swp and b/python/.qpalma.py.swp differ
diff --git a/python/compute_donacc.py b/python/compute_donacc.py
new file mode 100644 (file)
index 0000000..b66dfd5
--- /dev/null
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import math
+from numpy.matlib import zeros
+
+def compute_donacc(donor_supp, acceptor_supp, d, a):
+
+   assert(len(donor_supp)==len(acceptor_supp))
+  
+   donor = zeros(len(donor_supp))
+   acceptor= zeros(len(acceptor_supp))
+
+   for idx in range(len(donor_supp)):
+     if donor_supp[idx] == inf:
+       donor[idx] = donor_supp[idx]; 
+     else:
+       donor[idx] = penalty_lookup_new(d, donor_supp[idx])
+
+     if acceptor_supp[idx] == inf:
+       acceptor[idx] = acceptor_supp[idx]
+     else:
+       acceptor[idx] = penalty_lookup_new(a,acceptor_supp[idx])
diff --git a/python/penalty_lookup_new.py b/python/penalty_lookup_new.py
new file mode 100644 (file)
index 0000000..66df8ee
--- /dev/null
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import math
+
+def penalty_lookup_new(penalty_struct, value):
+
+   limits = penalty_struct.limits
+   penalties = penalty_struct.penalties
+
+   if penalty_struct.transform == 'log':
+      value = math.log(value)
+
+   elif penalty_struct.transform == 'log(+3)':
+      value = math.log(value+3)
+
+   elif penalty_struct.transform == 'log(+1)':
+      value = math.log(value+1)
+
+   elif penalty_struct.transform == '' or\
+        penalty_struct.transform == '(+3)':
+      value += 3
+         
+   elif penalty_struct.transform == 'mod3':
+     value = value%3
+
+   else:
+      assert False,'unknown penalty transform'
+
+   count = len([elem for elem in limits if elem <= value])
+
+   if count == 0:
+      pen = penalties[0]
+   elif count == length(limits):
+     pen=penalties[-1]
+   else:
+     pen = (penalties[count+1]*(value-limits[count]) + penalties[count]\
+         *(limits[count+1]-value)) / (limits[count+1]-limits[count])
index d3fb752..2c70418 100644 (file)
@@ -13,6 +13,9 @@ from computeSpliceAlign import *
 from computeSpliceWeights import *
 import QPalmaDP
 
+from penalty_lookup import *
+from compute_donacc import *
+
 """
 A training method for the QPalma project
 
@@ -56,42 +59,11 @@ class Param:
       self.deletion_prob   = self.prob/3 ;
       self.mutation_prob   = self.prob/3 ;
 
-
-#%basedir='/fml/ag-raetsch/share/projects/splicing/uta/' 
-#if ~exist('basedir','var')
-#  basedir='/fml/ag-raetsch/share/projects/palma/elegans/' ;
-#end
-#
-#%basedir='/fml/ag-raetsch/share/projects/palma/human/' ;
-
-
-class GenomeInfo:
-   """
-   GenomeInfo's init function replaces 
-   init_genome
-
-   """
-   
-   def __init__(self,gen_file):
-      """ load the info stored in genome.config """
-
-      self.est_fnames      = g_est_fnames
-      self.cdna_fnames     = g_cdna_fnames
-      self.flcdna_fnames=g_flcdna_fnames
-      self.contig_names=g_contig_names
-      self.flat_fnames=g_flat_fnames
-      self.fasta_fnames=g_fasta_fnames
-      self.annotation_fnames=g_annotation_fnames
-      self.alphabet=g_alphabet
-      self.basedir=g_basedir
-      self.max_intron_len = g_max_intron_len
-      self.min_intron_len = g_min_intron_len
-      self.merge_est_transcripts = g_merge_est_transcripts
-
-   
 def run():
    ARGS = Param()
 
+   logfh = open('qpalma.log','w+')
+
    gen_file= '%s/genome.config' % ARGS.basedir
 
    cmd = ['']*4
@@ -108,17 +80,16 @@ def run():
    ginfo = scipy.io.loadmat('genome_info.mat')
    genome_info = ginfo['genome_info']
 
-   print 'genome_info.basedir is %s\n'%genome_info.basedir
+   plog(logfh,'genome_info.basedir is %s\n'%genome_info.basedir)
 
    Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data('training',genome_info,ARGS)
 
-   #############################################################################################
-   # Constants 
-   #############################################################################################
-
    # number of training instances
    N = len(Sequences) 
-   print 'Number of training examples: %d\n'% N
+   assert N == len(Acceptors) and N == len(Acceptors) and N == len(Exons)\
+   and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
+
+   plog(logfh,'Number of training examples: %d\n'% N)
 
    random_N = 100 ; # number of constraints to generate per iteration
    iteration_steps = 200 ; #upper bound on iteration steps
@@ -132,22 +103,13 @@ def run():
 
    [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation)
 
-   #import palma.model
-   #currentModel = palma.model.parse_file(modelFile)
-
-   #import palma_utils
-   #h,d,a,mmatrix = palma_utils.set_param(currentModel)
-
-   #############################################################################################
    # delete splicesite-score-information
-   #############################################################################################
-
-   #if ARGS.train_with_splicesitescoreinformation:
-   #   for i in range(len(Acceptors)):
-   #    Acceptors[i](Acceptors[{i]>-20)=1 ;
-   #    Donors{i}(Donors{i}>-20)=1 ;
-   #  end 
-   #end
+   if not ARGS.train_with_splicesitescoreinformation:
+      for i in range(len(Acceptors)):
+         if Acceptors[i] > -20:
+            Acceptors[i] = 1
+         if Donors[i] >-20:
+            Donors[i] = 1
 
    #############################################################################################
    # Training
@@ -241,18 +203,13 @@ def run():
          print_matrix = False
 
          currentAlignment = QPalmaDP.Alignment()
-         currentAlignment.penSetLength(30)
-         dA = QPalmaDP.createDoubleArrayFromList([1.0]*30)
-         #currentAlignment.penSetLimits(dA)
-         ps = QPalmaDP.penalty_struct()
-         ps.len = 30
-         ps.limit = dA
+         ps = h.convert2SWIG()
 
          currentAlignment.myalign( nr_paths, dna, dna_len,\
          est, est_len, ps, matchmatrix, mm_len, donor, d_len,\
          acceptor, a_len, remove_duplicate_scores, print_matrix)
 
-         pdb.set_trace()
+         print 'after myalign call...'
 
          # # Compute donor, acceptor with penalty_lookup_new
          # # returns two double lists
@@ -272,6 +229,8 @@ def run():
       iteration_nr += 1
       break
 
+   logfh.close()
+
    """
      for id = 1:N  
        %fprintf('%i\n', id) ;
index b38283f..1402e0d 100644 (file)
@@ -4,6 +4,7 @@
 import math
 import numpy.matlib
 import QPalmaDP
+import pdb
 
 def linspace(a,b,n):
    intervalLength = b-a
@@ -38,18 +39,18 @@ class Plf: #means piecewise linear function
       self.max_len = 0
       self.min_len = 0
 
-   #def convert2SWIG(self):
-   #   ps = QPalmaDP.penalty_struct()
-   #
-   #   #ps.len = self.len
-   #   #ps.limits = self.limits
-   #   ps.penalties = self.penalties
-   #   ps.max_len = self.max_len
-   #   ps.min_len = self.min_len
-   #   ps.transform = 0
-   #   ps.name = self.name
-
-   #   return ps
+   def convert2SWIG(self):
+      ps = QPalmaDP.penalty_struct()
+   
+      ps.limits = QPalmaDP.createDoubleArrayFromList(self.limits)
+      ps.penalties = QPalmaDP.createDoubleArrayFromList([elem[0] for elem in self.penalties.tolist()])
+
+      ps.max_len = self.max_len
+      ps.min_len = self.min_len
+      ps.transform = 0
+      ps.name = self.name
+
+      return ps
 
 
 def set_params_pa():