+ First fully working version.
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 14 May 2008 15:26:28 +0000 (15:26 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 14 May 2008 15:26:28 +0000 (15:26 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9010 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/grid_predict.py

index 7dffb16..50657c9 100644 (file)
@@ -8,10 +8,12 @@ import os
 import os.path
 import math
 
-import pythongrid
+from pythongrid import Job, KybJob, MethodJob, processJobs, Usage, processJobsLocally
 
 from qpalma_main import *
 
+import grid_predict
+
 
 def get_slices(dataset_size,num_nodes):
    all_instances = []
@@ -41,16 +43,16 @@ def makeJobs(run,dataset_fn,chunks,param):
 
    jobs=[]
 
-   for current_chunk in chunks:
-      current_job = KybJob(predict,[run,prediction_set,param])
+   for c_name,current_chunk in chunks[:1]:
+      current_job = KybJob(grid_predict.g_predict,[run,dataset_fn,current_chunk,param,c_name])
       current_job.h_vmem = '5.0G'
       current_job.express = 'True'
 
-      print "job #1: ", j1.nativeSpecification
+      print "job #1: ", current_job.nativeSpecification
 
-      jobs.append(j1)
+      jobs.append(current_job)
 
-  return jobs
+   return jobs
 
 
 def create_and_submit():
@@ -62,21 +64,33 @@ def create_and_submit():
 
    run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
 
-   run   = cPickle.load(jp(run_dir,'run_obj.pickle'))
-   param = cPickle.load(jp(run_dir,'param_526.pickle'))
+   run   = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
+   param = cPickle.load(open(jp(run_dir,'param_526.pickle')))
 
-   dataset_fn        = ''
-   prediction_keys   = ''
+   dataset_fn           = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_12_05_08.test.pickle'
+   prediction_keys_fn   = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_12_05_08.test_keys.pickle'
 
-   num_splits = 10
-   slices = get_slices(prediction_keys,num_splits)
-   chunks = []
-   for slice in slices:
-      chunks.append(prediction_keys[slice[0]:slice[1]])
+   prediction_keys = cPickle.load(open(prediction_keys_fn))
 
+   print 'Found %d keys for prediction.' % len(prediction_keys)
+
+   num_splits = 12
+   slices = get_slices(len(prediction_keys),num_splits)
+   chunks = []
+   for idx,slice in enumerate(slices):
+      c_name = 'chunk_%d' % idx
+      chunks.append((c_name,prediction_keys[slice[0]:slice[1]]))
 
    functionJobs = makeJobs(run,dataset_fn,chunks,param)
 
+   sum = 0
+   for size in [len(elem) for name,elem in chunks]:
+      sum += size
+   
+   assert sum == len(prediction_keys)
+
+   print 'Got %d job(s)' % len(functionJobs)
+
    print "output ret field in each job before sending it onto the cluster"
    for (i, job) in enumerate(functionJobs):
       print "Job with id: ", i, "- ret: ", job.ret
@@ -85,21 +99,23 @@ def create_and_submit():
    print "sending function jobs to cluster"
    print ""
 
-   #processedFunctionJobs = processJobs(functionJobs)
+   processedFunctionJobs = processJobs(functionJobs)
 
    print "ret fields AFTER execution on cluster"
    for (i, job) in enumerate(processedFunctionJobs):
       print "Job with id: ", i, "- ret: ", job.ret
 
 
-def predict(run,prediction_set,param):
+def g_predict(run,dataset_fn,prediction_keys,param,set_name):
    """
-   
+  
    """
 
    qp = QPalma()
-   qp.predict(run,dataset_fn,prediction_keys,param):
+   qp.predict(run,dataset_fn,prediction_keys,param,set_name)
+
+   return 'finished prediction of set %s.' % set_name
 
 
 if __name__ == '__main__':
-   create_and_submit():
+   create_and_submit()