+ made original reads file location a parameter for the command line
[qpalma.git] / scripts / createNewDataset.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import qpalma.Configuration as Conf
5 from compile_dataset import DatasetGenerator
6
7 #filtered_fn = '/fml/ag-raetsch/share/projects/qpalma/solexa/paper_data/allReads.full.newid'
8 #remapped_fn = '/fml/ag-raetsch/share/projects/qpalma/solexa/paper_data/map_2nd.vm_chr'
9
10 filtered_fn = '/fml/ag-raetsch/share/projects/qpalma/solexa/paper_data/allReads.test'
11 remapped_fn = '/fml/ag-raetsch/share/projects/qpalma/solexa/paper_data/map.test'
12
13 #compile_d2(Conf.dna_flat_fn,filtered_fn,remapped_fn,'dataset_2nd')
14 #compile_dataset_direct(filtered_fn,'dataset_all_spliced_reads_2.pickle')
15
16 dg = DatasetGenerator(filtered_fn,remapped_fn)
17 dg.compile_training_set()
18 dg.compile_testing_set()
19 dg.saveAs('dataset_11_05')