import bz2
def writeStruct(fid,plif):
- fid.write('%s_len_limits=%s\n'%(plif.name,str(plif.limits)))
- fid.write('%s_len_penalties=%s\n'%(plif.name,str(plif.penalties)))
- fid.write('%s_len_bins=%d\n'%(plif.name,len(plif.limits)))
+ fid.write('%s_limits=%s\n'%(plif.name,str(plif.limits)))
+ fid.write('%s_penalties=%s\n'%(plif.name,str(plif.penalties)))
+ fid.write('%s_bins=%d\n'%(plif.name,len(plif.limits)))
if plif.name == 'intron':
+ fid.write('%s_len_limits=%s\n'%(plif.name,str(plif.limits)))
+ fid.write('%s_len_penalties=%s\n'%(plif.name,str(plif.penalties)))
+ fid.write('%s_len_bins=%d\n'%(plif.name,len(plif.limits)))
fid.write('%s_len_min=%d\n'%(plif.name,plif.min_len))
fid.write('%s_len_max=%d\n'%(plif.name,plif.max_len))
fid.write('%s_len_transform=%s\n'%(plif.name,plif.transform))
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import cPickle
+
+def paths_load_data_pickle(expt,genome_info,PAR):
+ """
+
+ """
+
+ # function [Sequences, Acceptors, Donors, Exons, Ests, Noises] = paths_load_data(expt,genome_info,PAR)
+ # Load the relevant file and return the alignment data
+
+ # expt can be 'training','validation' or 'test'
+
+ assert expt in ['training','validation','test']
+
+ tmp_dir = '/fml/ag-raetsch/home/fabio/tmp'
+
+ Noises = [];
+
+ if expt == 'training':
+ if PAR.microexon:
+ if PAR.LOCAL_ALIGN: # local version
+
+ train_data = '%s/microexon_train_data.pickle' % genome_info.basedir
+ data = cPickle.load(open(train_data))
+
+ else: # global version
+ pass
+
+
+ else:
+ train_data = '%s/exons_train_local.pickle' % genome_info.basedir
+ data = cPickle.load(open(train_data))
+
+ print 'train_data is %s' % train_data
+
+ Sequences = data['Train'] # dna sequences
+ Acceptors = data['TrainAcc'] # acceptor scores
+ Donors = data['TrainDon'] # donor scores
+ Exons = data['TrainExon'] # exon boundaries
+ Ests = data['TrainEsts'] # est sequences
+
+ # Lower all indices by one to convert matlab
+ # to python indices
+
+ Exons -= 1
+
+ return Sequences, Acceptors, Donors, Exons, Ests, Noises
and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
self.plog('Number of training examples: %d\n'% N)
- iteration_steps = 200 ; #upper bound on iteration steps
+ #iteration_steps = 200 ; #upper bound on iteration steps
+ iteration_steps = 2 ; #upper bound on iteration steps
remove_duplicate_scores = False
print_matrix = False
#############################################################################################
self.plog('Starting training...\n')
- iteration_nr = 1
+ iteration_nr = 0
while True:
if iteration_nr == iteration_steps:
for i in range(num_path[exampleIdx]):
AlignmentScores[i+1] = newAlignmentScores[i]
- #print AlignmentScores
-
spliceAlign = spliceAlign.reshape(num_path[exampleIdx],dna_len)
weightMatch = weightMatch.reshape(num_path[exampleIdx],mm_len)
# Calculate weights of the respective alignments Note that we are
#
# end of one example processing
#
- if exampleIdx == 10:
- break
+ #if exampleIdx == 100:
+ # break
- break
+ #break
#
# end of one iteration through all examples