+ made original reads file location a parameter for the command line
[qpalma.git] / scripts / check_and_init.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os.path
5 import cPickle
6
7 import qpalma.Configuration as Conf
8
9
10 def check_vmatch_params(Config):
11 #
12 # Check and set parameters for the first VMatch step
13 #
14
15 Config['mismatches_1'] = Conf.mismatches_1
16 Config['end_gap_1'] = Conf.end_gap_1
17 Config['read_length_1'] = Conf.read_length_1
18 Config['repeat_mapping_1'] = Conf.repeat_mapping_1
19 Config['seedlength_1'] = Conf.seedlength_1
20 Config['suffixtree_1'] = Conf.suffixtree_1
21
22 assert 0 <= Config['mismatches_1'] <= Config['read_size']
23 assert 0 <= Config['end_gap_1'] <= 5
24 assert 0 <= Config['repeat_mapping_1'] <= 10
25 assert 2 <= Config['seedlength_1'] <= Config['read_size']
26 assert os.path.exists( Config['suffixtree_1'] )
27
28 #
29 # Check and set parameters for the second VMatch step
30 #
31
32 Config['mismatches_2'] = Conf.mismatches_2
33 Config['sub_mismatches_2'] = Conf.sub_mismatches_2
34 Config['min_short_end_2'] = Conf.min_short_end_2
35 Config['repeat_mapping_2'] = Conf.repeat_mapping_2
36 Config['seedlength_2'] = Conf.seedlength_2
37
38 assert 0 <= Config['mismatches_2'] <= Config['read_size']
39 assert 0 <= Config['sub_mismatches_2'] <= Config['read_size']
40 assert 0 <= Config['min_short_end_2'] <= Config['read_size']
41 assert 0 <= Config['repeat_mapping_2'] <= Config['read_size']
42 assert os.path.exists( Config['suffixtree_2'] )
43
44
45 def check_and_init():
46 """
47 The purpose of this script is to take all the global variables from the
48 Configuration file and store them into a dictionary for the pipeline.
49
50 Additionally sanity checks are performed for the parameters to be sure they
51 are within a certain interval or the file they point to exists etc.
52 """
53
54 jp = os.path.join
55
56 # create a python dictionary to store all configuration parameters
57 Config = {}
58
59 result_dir = Conf.result_dir
60 assert os.path.exists(result_dir), 'Error you have to specify an existing result_dir.'
61
62 # assing main result dir in Config dictionary
63 Config['result_dir'] = result_dir
64
65 Config['reads_location'] = Conf.reads_location
66 #assert os.path.exists(Config['reads_location'])
67
68 Config['read_size'] = Conf.read_size
69 assert 2 <= Config['read_size'] <= 100
70
71 # Check VMatch related parameters
72 check_vmatch_params(Config)
73
74 subdirs = []
75
76 # create subdirs needed for mapping / alignment / remapping
77 mapping_dir = jp(result_dir,'mapping')
78 Config['mapping_dir'] = mapping_dir
79 Config['mapping_main_dir'] = jp(mapping_dir,'main')
80 Config['mapping_spliced_dir'] = jp(mapping_dir,'spliced')
81
82 subdirs.extend([Config['mapping_dir'], Config['mapping_main_dir'],\
83 Config['mapping_spliced_dir']])
84
85 Config['alignment_dir'] = jp(result_dir,'alignment')
86
87 subdirs.extend([Config['alignment_dir']])
88
89 Config['remapping_dir'] = jp(result_dir,'remapping')
90
91 subdirs.extend([Config['remapping_dir']])
92
93 #try:
94 # for current_dir in subdirs:
95 # os.mkdir(current_dir)
96 #except:
97 # print 'Error during initialization of project directories'
98
99 # store the configuration in a pickled python dictionary
100 cPickle.dump(Config,open(Conf.conf_object_path,'w+'))
101
102
103 if __name__ == '__main__':
104 check_and_init()