ffa07abe5031d09c418474f25bbb6d85acb308f5
[qpalma.git] / tools / run_specific_scripts / transcriptome_analysis / createNewDataset.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os.path
5 import sys
6 import cProfile
7
8 from compile_dataset import DatasetGenerator
9
10 jp = os.path.join
11
12 def combine_spliced_map_parts(data_dir,result_fn):
13 """
14 """
15
16 assert os.path.exists(data_dir)
17
18 result_fn = jp(data_dir,result_fn)
19
20 if os.path.exists(result_fn):
21 os.remove(result_fn)
22
23 for chunk_fn in os.listdir(data_dir):
24 if chunk_fn.endswith('.spliced'):
25 full_chunk_fn = jp(data_dir,chunk_fn)
26 cmd = 'cat %s >> %s' % (full_chunk_fn,result_fn)
27 os.system(cmd)
28
29 cmd = "cat %s | sed -e \'/^#/d\' > tmp ; mv tmp %s" % (result_fn,result_fn)
30 os.system(cmd)
31
32
33 def run():
34
35 #main_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/main'
36 #spliced_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_3'
37 #result_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_3/dataset'
38
39 main_dir = '/fml/ag-raetsch/share/projects/qpalma/thaliana_4_lanes/lane_4/main/'
40 spliced_dir = '/fml/ag-raetsch/share/projects/qpalma/thaliana_4_lanes/lane_4/spliced/'
41 result_dir = '/fml/ag-raetsch/share/projects/qpalma/thaliana_4_lanes/lane_4/spliced/dataset/'
42
43 combine_spliced_map_parts(main_dir,'map.vm.spliced')
44
45 map_1_fn = jp(main_dir,'map.vm.spliced')
46 map_2_fn = jp(spliced_dir,'map.vm')
47
48 dg = DatasetGenerator(map_1_fn,map_2_fn)
49 dg.compile_dataset()
50 dg.saveAs(jp(result_dir,'dataset_run_1'))
51
52 if __name__ == '__main__':
53 #cProfile.run('run()')
54 run()