112ec5f3746f54f15bfac89cf24a1653c19d0b66
[qpalma.git] / python / paths_load_data_pickle.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5
6 def paths_load_data_pickle(expt,genome_info,PAR):
7 """
8
9 """
10
11 # function [Sequences, Acceptors, Donors, Exons, Ests, Noises] = paths_load_data(expt,genome_info,PAR)
12 # Load the relevant file and return the alignment data
13
14 # expt can be 'training','validation' or 'test'
15
16 assert expt in ['training','validation','test']
17
18 tmp_dir = '/fml/ag-raetsch/home/fabio/tmp'
19
20 Noises = [];
21
22 if expt == 'training':
23 if PAR.microexon:
24 if PAR.LOCAL_ALIGN: # local version
25
26 train_data = '%s/microexon_train_data.pickle' % genome_info.basedir
27 data = cPickle.load(open(train_data))
28
29 else: # global version
30 pass
31
32
33 else:
34 train_data = '%s/exons_train_local.pickle' % genome_info.basedir
35 data = cPickle.load(open(train_data))
36
37 print 'train_data is %s' % train_data
38
39 Sequences = data['Train'] # dna sequences
40 Acceptors = data['TrainAcc'] # acceptor scores
41 Donors = data['TrainDon'] # donor scores
42 Exons = data['TrainExon'] # exon boundaries
43 Ests = data['TrainEsts'] # est sequences
44
45 # Lower all indices by one to convert matlab
46 # to python indices
47
48 Exons -= 1
49
50 return Sequences, Acceptors, Donors, Exons, Ests, Noises