--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import math
+
+def penalty_lookup_new(penalty_struct, value):
+
+ limits = penalty_struct.limits
+ penalties = penalty_struct.penalties
+
+ if penalty_struct.transform == 'log':
+ value = math.log(value)
+
+ elif penalty_struct.transform == 'log(+3)':
+ value = math.log(value+3)
+
+ elif penalty_struct.transform == 'log(+1)':
+ value = math.log(value+1)
+
+ elif penalty_struct.transform == '' or\
+ penalty_struct.transform == '(+3)':
+ value += 3
+
+ elif penalty_struct.transform == 'mod3':
+ value = value%3
+
+ else:
+ assert False,'unknown penalty transform'
+
+ count = len([elem for elem in limits if elem <= value])
+
+ if count == 0:
+ pen = penalties[0]
+ elif count == length(limits):
+ pen=penalties[-1]
+ else:
+ pen = (penalties[count+1]*(value-limits[count]) + penalties[count]\
+ *(limits[count+1]-value)) / (limits[count+1]-limits[count])
from computeSpliceWeights import *
import QPalmaDP
+from penalty_lookup import *
+from compute_donacc import *
+
"""
A training method for the QPalma project
self.deletion_prob = self.prob/3 ;
self.mutation_prob = self.prob/3 ;
-
-#%basedir='/fml/ag-raetsch/share/projects/splicing/uta/'
-#if ~exist('basedir','var')
-# basedir='/fml/ag-raetsch/share/projects/palma/elegans/' ;
-#end
-#
-#%basedir='/fml/ag-raetsch/share/projects/palma/human/' ;
-
-
-class GenomeInfo:
- """
- GenomeInfo's init function replaces
- init_genome
-
- """
-
- def __init__(self,gen_file):
- """ load the info stored in genome.config """
-
- self.est_fnames = g_est_fnames
- self.cdna_fnames = g_cdna_fnames
- self.flcdna_fnames=g_flcdna_fnames
- self.contig_names=g_contig_names
- self.flat_fnames=g_flat_fnames
- self.fasta_fnames=g_fasta_fnames
- self.annotation_fnames=g_annotation_fnames
- self.alphabet=g_alphabet
- self.basedir=g_basedir
- self.max_intron_len = g_max_intron_len
- self.min_intron_len = g_min_intron_len
- self.merge_est_transcripts = g_merge_est_transcripts
-
-
def run():
ARGS = Param()
+ logfh = open('qpalma.log','w+')
+
gen_file= '%s/genome.config' % ARGS.basedir
cmd = ['']*4
ginfo = scipy.io.loadmat('genome_info.mat')
genome_info = ginfo['genome_info']
- print 'genome_info.basedir is %s\n'%genome_info.basedir
+ plog(logfh,'genome_info.basedir is %s\n'%genome_info.basedir)
Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data('training',genome_info,ARGS)
- #############################################################################################
- # Constants
- #############################################################################################
-
# number of training instances
N = len(Sequences)
- print 'Number of training examples: %d\n'% N
+ assert N == len(Acceptors) and N == len(Acceptors) and N == len(Exons)\
+ and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
+
+ plog(logfh,'Number of training examples: %d\n'% N)
random_N = 100 ; # number of constraints to generate per iteration
iteration_steps = 200 ; #upper bound on iteration steps
[h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation)
- #import palma.model
- #currentModel = palma.model.parse_file(modelFile)
-
- #import palma_utils
- #h,d,a,mmatrix = palma_utils.set_param(currentModel)
-
- #############################################################################################
# delete splicesite-score-information
- #############################################################################################
-
- #if ARGS.train_with_splicesitescoreinformation:
- # for i in range(len(Acceptors)):
- # Acceptors[i](Acceptors[{i]>-20)=1 ;
- # Donors{i}(Donors{i}>-20)=1 ;
- # end
- #end
+ if not ARGS.train_with_splicesitescoreinformation:
+ for i in range(len(Acceptors)):
+ if Acceptors[i] > -20:
+ Acceptors[i] = 1
+ if Donors[i] >-20:
+ Donors[i] = 1
#############################################################################################
# Training
print_matrix = False
currentAlignment = QPalmaDP.Alignment()
- currentAlignment.penSetLength(30)
- dA = QPalmaDP.createDoubleArrayFromList([1.0]*30)
- #currentAlignment.penSetLimits(dA)
- ps = QPalmaDP.penalty_struct()
- ps.len = 30
- ps.limit = dA
+ ps = h.convert2SWIG()
currentAlignment.myalign( nr_paths, dna, dna_len,\
est, est_len, ps, matchmatrix, mm_len, donor, d_len,\
acceptor, a_len, remove_duplicate_scores, print_matrix)
- pdb.set_trace()
+ print 'after myalign call...'
# # Compute donor, acceptor with penalty_lookup_new
# # returns two double lists
iteration_nr += 1
break
+ logfh.close()
+
"""
for id = 1:N
%fprintf('%i\n', id) ;