import os.path
import math
-import pythongrid
+from pythongrid import Job, KybJob, MethodJob, processJobs, Usage, processJobsLocally
from qpalma_main import *
+import grid_predict
+
def get_slices(dataset_size,num_nodes):
all_instances = []
jobs=[]
- for current_chunk in chunks:
- current_job = KybJob(predict,[run,prediction_set,param])
+ for c_name,current_chunk in chunks[:1]:
+ current_job = KybJob(grid_predict.g_predict,[run,dataset_fn,current_chunk,param,c_name])
current_job.h_vmem = '5.0G'
current_job.express = 'True'
- print "job #1: ", j1.nativeSpecification
+ print "job #1: ", current_job.nativeSpecification
- jobs.append(j1)
+ jobs.append(current_job)
- return jobs
+ return jobs
def create_and_submit():
run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
- run = cPickle.load(jp(run_dir,'run_obj.pickle'))
- param = cPickle.load(jp(run_dir,'param_526.pickle'))
+ run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
+ param = cPickle.load(open(jp(run_dir,'param_526.pickle')))
- dataset_fn = ''
- prediction_keys = ''
+ dataset_fn = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_12_05_08.test.pickle'
+ prediction_keys_fn = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_12_05_08.test_keys.pickle'
- num_splits = 10
- slices = get_slices(prediction_keys,num_splits)
- chunks = []
- for slice in slices:
- chunks.append(prediction_keys[slice[0]:slice[1]])
+ prediction_keys = cPickle.load(open(prediction_keys_fn))
+ print 'Found %d keys for prediction.' % len(prediction_keys)
+
+ num_splits = 12
+ slices = get_slices(len(prediction_keys),num_splits)
+ chunks = []
+ for idx,slice in enumerate(slices):
+ c_name = 'chunk_%d' % idx
+ chunks.append((c_name,prediction_keys[slice[0]:slice[1]]))
functionJobs = makeJobs(run,dataset_fn,chunks,param)
+ sum = 0
+ for size in [len(elem) for name,elem in chunks]:
+ sum += size
+
+ assert sum == len(prediction_keys)
+
+ print 'Got %d job(s)' % len(functionJobs)
+
print "output ret field in each job before sending it onto the cluster"
for (i, job) in enumerate(functionJobs):
print "Job with id: ", i, "- ret: ", job.ret
print "sending function jobs to cluster"
print ""
- #processedFunctionJobs = processJobs(functionJobs)
+ processedFunctionJobs = processJobs(functionJobs)
print "ret fields AFTER execution on cluster"
for (i, job) in enumerate(processedFunctionJobs):
print "Job with id: ", i, "- ret: ", job.ret
-def predict(run,prediction_set,param):
+def g_predict(run,dataset_fn,prediction_keys,param,set_name):
"""
-
+
"""
qp = QPalma()
- qp.predict(run,dataset_fn,prediction_keys,param):
+ qp.predict(run,dataset_fn,prediction_keys,param,set_name)
+
+ return 'finished prediction of set %s.' % set_name
if __name__ == '__main__':
- create_and_submit():
+ create_and_submit()