424a61f863c81cd7fcafcbf1037a4b082fc08f75
[qpalma.git] / scripts / grid_heuristic.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import pdb
7 import time
8 import os
9 import os.path
10 import math
11
12 from pythongrid import KybJob, Usage
13 from pythongrid import process_jobs, submit_jobs, collect_jobs, get_status
14
15 from PipelineHeuristic import *
16
17 import grid_heuristic
18
19 from Utils import split_file
20
21
22 def g_heuristic(run_fname,data_fname,param_fname,result_fname):
23 #print run_fname,data_fname,param_fname,result_fname
24 ph1 = PipelineHeuristic(run_fname,data_fname,param_fname,result_fname)
25 ph1.filter()
26
27 return 'finished filtering set %s.' % data_fname
28
29
30 def create_and_submit():
31 jp = os.path.join
32
33 num_splits = 25
34
35 run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
36 #data_dir = '/fml/ag-raetsch/home/fabio/tmp/lyrata_analysis/'
37
38 data_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/main'
39
40
41 run_fname = jp(run_dir,'run_obj.pickle')
42
43 #original_map_fname = '/fml/ag-raetsch/home/fabio/tmp/lyrata_analysis/map.vm'
44 #original_map_fname = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/new_map.vm'
45 original_map_fname = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/main/map.vm'
46 split_file(original_map_fname,data_dir,num_splits)
47
48 param_fname = jp(run_dir,'param_526.pickle')
49
50 functionJobs=[]
51
52 for idx in range(0,num_splits):
53 data_fname = jp(data_dir,'map.part_%d'%idx)
54 result_fname = jp(data_dir,'map.vm.part_%d.heuristic'%idx)
55
56 #pdb.set_trace()
57
58 current_job = KybJob(grid_heuristic.g_heuristic,[run_fname,data_fname,param_fname,result_fname])
59 current_job.h_vmem = '25.0G'
60 #current_job.express = 'True'
61
62 print "job #1: ", current_job.nativeSpecification
63
64 functionJobs.append(current_job)
65 #break
66
67 (sid, jobids) = submit_jobs(functionJobs)
68 #print 'checking whether finished'
69 #while not get_status(sid, jobids):
70 # time.sleep(7)
71 #print 'collecting jobs'
72 #retjobs = collect_jobs(sid, jobids, functionJobs)
73 #print "ret fields AFTER execution on cluster"
74 #for (i, job) in enumerate(retjobs):
75 # print "Job #", i, "- ret: ", job.ret
76
77 #print '--------------'
78
79
80 if __name__ == '__main__':
81 #split_file_join_results('/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/map.vm',10)
82 create_and_submit()