2 # -*- coding: utf-8 -*-
8 def paths_load_data(expt
,genome_info
,PAR
):
13 # function [Sequences, Acceptors, Donors, Exons, Ests, Noises] = paths_load_data(expt,genome_info,PAR)
14 # Load the relevant file and return the alignment data
16 # expt can be 'training','validation' or 'test'
18 assert expt
in ['training','validation','test']
20 tmp_dir
= '/fml/ag-raetsch/home/fabio/tmp'
24 if expt
== 'training':
26 if PAR
.LOCAL_ALIGN
: # local version
28 train_data
= '%s/microexon_train_data_cut_local.mat' % genome_info
.basedir
29 train_data
= '%s/microexon_train_data.mat' % genome_info
.basedir
30 #train_data_pickle = '%s/microexon_train_data_cut_local.pickle'% tmp_dir
31 #io_pickle.convert_v6(train_data,train_data_pickle)
32 #train_data = io_pickle.load(train_data_pickle)
33 data
= scipy
.io
.loadmat(train_data
)
35 else: # global version
37 train_data
= '%s/microexon_train_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat' %\
38 (genome_info
.basedir
, PAR
.insertion_prob
, PAR
.deletion_prob
, PAR
.mutation_prob
)
40 train_data
= '%s/microexon_train_data.mat' % genome_info
.basedir
41 #train_data_pickle = '%s/microexon_train_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.pickle' %\
42 # (tmp_dir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob)
44 #io_pickle.convert_v6(train_data,train_data_pickle)
45 #train_data = io_pickle.load(train_data_pickle)
46 data
= scipy
.io
.loadmat(train_data
)
47 Noises
= data
['TrainNoise'] # substitution matrix
50 train_data
= '%s/exons_train_local.mat' % genome_info
.basedir
51 #train_data_pickle = '%s/exons_train_local.pickle'% tmp_dir
52 #io_pickle.convert_v6(train_data,train_data_pickle)
53 #microexon_train_data = io_pickle.load(train_data_pickle)
54 data
= scipy
.io
.loadmat(train_data
)
56 print 'train_data is %s' % train_data
58 Sequences
= data
['Train'] # dna sequences
59 Acceptors
= data
['TrainAcc'] # acceptor scores
60 Donors
= data
['TrainDon'] # donor scores
61 Exons
= data
['TrainExon'] # exon boundaries
62 Ests
= data
['TrainEsts'] # est sequences
64 #elif expt == 'validation':
65 # print('Loading validation data\n') ;
69 # load(sprintf('%s/microexon_val_data_cut_local_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
70 # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
71 # 'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
74 # load(sprintf('%s/microexon_val_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
75 # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
76 # 'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
79 # load(sprintf('%s/exons_val_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
80 # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
81 # 'ValEsts', 'ValExon', 'Val', 'ValAcc', 'ValDon') ;
84 # Sequences = Val ; % dna sequences
85 # Acceptors = ValAcc ; % acceptor scores
86 # Donors = ValDon ; % donor scores
87 # Exons = ValExon ; % exon boundaries
88 # Ests = ValEsts ; % est sequences
93 # fprintf('Loading test data\n') ;
97 # load(sprintf('%s/microexon_test_data_cut_local_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
98 # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
99 # 'TestEsts', 'TestExon', 'Test', 'TestAcc', 'TestDon') ;
102 # load(sprintf('%s/microexon_test_data_cut_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
103 # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
104 # 'TestEsts', 'TestExon', 'Test','TestAcc', 'TestDon', 'TestNoise') ;
105 # Noises = TestNoise ; % substitution matrix
108 # load(sprintf('%s/exons_test_ip=%1.3f_dp=%1.3f_mp=%1.3f.mat', ...
109 # genome_info.basedir, PAR.insertion_prob, PAR.deletion_prob, PAR.mutation_prob), ...
110 # 'TestEsts', 'TestExon', 'Test', 'TestAcc', 'TestDon') ;
113 # Sequences = Test ; % dna sequences
114 # Acceptors = TestAcc ; % acceptor scores
115 # Donors = TestDon ; % donor scores
116 # Exons = TestExon ; % exon boundaries
117 # Ests = TestEsts ; % est sequences
119 return Sequences
, Acceptors
, Donors
, Exons
, Ests
, Noises