+ renamed dyn_prog directory
[qpalma.git] / scripts / QPalmaPipeline.py
1 #!/usr/bin/env python
2 # -*- coding:latin-1 -*-
3
4 # first step
5
6 import qpalma.Configuration as Conf
7
8 from pipeline.Pipeline import *
9
10 import os.path
11 import cPickle
12
13 from PipelineHeuristic import *
14
15 import glob
16 import time
17
18
19 def get_most_recent_param(current_dir):
20 date_file_list = []
21 for file in glob.glob(current_dir + '/param*'):
22 stats = os.stat(file)
23 lastmod_date = time.localtime(stats[8])
24 date_file_tuple = lastmod_date, file
25 date_file_list.append(date_file_tuple)
26
27 date_file_list.sort()
28 date_file_list.reverse()
29
30 return date_file_list[0][1]
31
32
33 class InitStep(PipelineStep):
34
35 def __init__(self):
36
37 result_dir = Conf.result_dir
38
39
40 class FirstStep(PipelineStep):
41 """
42
43 """
44
45 def __init__(self):
46 jp = os.path.join
47
48 # a list storing all commands for this particular step
49 cL = []
50
51
52 # fetch the config object which stores all relevant parameters
53 Config = cPickle.load(open(Conf.conf_object_path))
54
55 reads_0_1st = jp(Config['mapping_main_dir'],'reads_0.fl')
56 cmd = 'fix_indels.pl %s > %s' % (Config['reads_location'],reads_0_1st)
57 cL.append(cmd)
58
59 # ATTENTION: Changing working directory
60 os.chdir(Config['mapping_main_dir'])
61
62 cmd = 'map_full_transcript.pl --mismatches=%d --end_gap=%d -no_edit_distance --read_length=%d --repeat_mapping=%d --seedlength=%d --dir=. --suffixtree=%s' % (\
63 Config['mismatches_1'],\
64 Config['end_gap_1'],\
65 Config['read_size'],\
66 Config['repeat_mapping_1'],\
67 Config['seedlength_1'],\
68 Config['suffixtree_1'])
69
70 cL.append(cmd)
71
72 reads_0_2nd = jp(Config['mapping_spliced_dir'],'reads_0.fl')
73 cmd = 'add_quality_2_fl.pl reads_0.fl reads_3.fl > %s' % reads_0_2nd
74
75 cL.append(cmd)
76
77 # ATTENTION: Changing working directory
78 os.chdir(Config['mapping_spliced_dir'])
79
80 # ATTENTION: Whether we should use the filtering step is not clear
81 #cmd = 'filter_quality.pl 8 reads_0.fl && mv reads_0.fl.filtered reads_0.fl'
82 #cL.append(cmd)
83
84 cmd = 'map_spliced_transcript.pl --mismatches=%d --sub_mismatches=%d --read_length=%d --min_short_end=%d --repeat_mapping=%d --seedlength=%d --dir=. --suffixtree=%s' % (\
85 Config['mismatches_2'],\
86 Config['sub_mismatches_2'],\
87 Config['read_size'],\
88 Config['min_short_end_2'],\
89 Config['repeat_mapping_2'],\
90 Config['seedlength_2'],\
91 Config['suffixtree_2'])
92
93 cL.append(cmd)
94
95 for elem in cL:
96 print elem
97
98 PipelineStep.__init__(self,cL)
99
100
101 class SecondStep(PipelineStep):
102 """
103 We can train
104 """
105
106 pass
107
108
109 class ThirdStep(PipelineStep):
110 """
111 We use the QPalma heuristic to predict those reads that should be aligned by
112 the QPalma alignment program even though vmatch found a full match with some
113 mismatches.
114 """
115 jp = os.path.join
116
117 # fetch the config object which stores all relevant parameters
118 Config = cPickle.load(open(Conf.conf_object_path))
119
120 data_fn = jp(Config['heuristic_dir'],'map.vm')
121 param_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
122 param_fn = get_most_recent_param(param_dir)
123 run_fn = jp(param_dir,'run_obj.pickle')
124
125 result_spliced_fn = jp(Config['heuristic_dir'],'spliced.heuristic')
126 result_unspliced_fn = jp(Config['heuristic_dir'],'unspliced.heuristic')
127
128 ph1 = PipelineHeuristic(run_fn,data_fn,param_fn,result_spliced_fn,result_unspliced_fn)
129 ph1.filter()
130
131
132 class FourthStep(PipelineStep):
133 """
134
135 """
136
137
138 if __name__ == '__main__':
139 step1 = FirstStep()