+ added two scripts for the psl2gff step
[qpalma.git] / scripts / grid_predict.py
index 7dffb16..3d962f7 100644 (file)
@@ -8,10 +8,12 @@ import os
 import os.path
 import math
 
 import os.path
 import math
 
-import pythongrid
+from pythongrid import Job, KybJob, MethodJob, processJobs, Usage, processJobsLocally
 
 from qpalma_main import *
 
 
 from qpalma_main import *
 
+import grid_predict
+
 
 def get_slices(dataset_size,num_nodes):
    all_instances = []
 
 def get_slices(dataset_size,num_nodes):
    all_instances = []
@@ -41,16 +43,16 @@ def makeJobs(run,dataset_fn,chunks,param):
 
    jobs=[]
 
 
    jobs=[]
 
-   for current_chunk in chunks:
-      current_job = KybJob(predict,[run,prediction_set,param])
-      current_job.h_vmem = '5.0G'
-      current_job.express = 'True'
+   for c_name,current_chunk in chunks:
+      current_job = KybJob(grid_predict.g_predict,[run,dataset_fn,current_chunk,param,c_name])
+      current_job.h_vmem = '30.0G'
+      #current_job.express = 'True'
 
 
-      print "job #1: ", j1.nativeSpecification
+      print "job #1: ", current_job.nativeSpecification
 
 
-      jobs.append(j1)
+      jobs.append(current_job)
 
 
-  return jobs
+   return jobs
 
 
 def create_and_submit():
 
 
 def create_and_submit():
@@ -60,23 +62,37 @@ def create_and_submit():
 
    jp = os.path.join
 
 
    jp = os.path.join
 
-   run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
+   run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
 
 
-   run   = cPickle.load(jp(run_dir,'run_obj.pickle'))
-   param = cPickle.load(jp(run_dir,'param_526.pickle'))
+   run   = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
+   run['name'] = 'saved_run'
 
 
-   dataset_fn        = ''
-   prediction_keys   = ''
+   param = cPickle.load(open(jp(run_dir,'param_526.pickle')))
 
 
-   num_splits = 10
-   slices = get_slices(prediction_keys,num_splits)
-   chunks = []
-   for slice in slices:
-      chunks.append(prediction_keys[slice[0]:slice[1]])
+   dataset_fn           = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/dataset_transcriptome_run_1.pickle'
+   prediction_keys_fn   = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/dataset_transcriptome_run_1.keys.pickle'
+
+   prediction_keys = cPickle.load(open(prediction_keys_fn))
 
 
+   print 'Found %d keys for prediction.' % len(prediction_keys)
+
+   num_splits = 25
+   slices = get_slices(len(prediction_keys),num_splits)
+   chunks = []
+   for idx,slice in enumerate(slices):
+      c_name = 'chunk_%d' % idx
+      chunks.append((c_name,prediction_keys[slice[0]:slice[1]]))
 
    functionJobs = makeJobs(run,dataset_fn,chunks,param)
 
 
    functionJobs = makeJobs(run,dataset_fn,chunks,param)
 
+   sum = 0
+   for size in [len(elem) for name,elem in chunks]:
+      sum += size
+   
+   assert sum == len(prediction_keys)
+
+   print 'Got %d job(s)' % len(functionJobs)
+
    print "output ret field in each job before sending it onto the cluster"
    for (i, job) in enumerate(functionJobs):
       print "Job with id: ", i, "- ret: ", job.ret
    print "output ret field in each job before sending it onto the cluster"
    for (i, job) in enumerate(functionJobs):
       print "Job with id: ", i, "- ret: ", job.ret
@@ -85,21 +101,23 @@ def create_and_submit():
    print "sending function jobs to cluster"
    print ""
 
    print "sending function jobs to cluster"
    print ""
 
-   #processedFunctionJobs = processJobs(functionJobs)
+   processedFunctionJobs = processJobs(functionJobs)
 
    print "ret fields AFTER execution on cluster"
    for (i, job) in enumerate(processedFunctionJobs):
       print "Job with id: ", i, "- ret: ", job.ret
 
 
 
    print "ret fields AFTER execution on cluster"
    for (i, job) in enumerate(processedFunctionJobs):
       print "Job with id: ", i, "- ret: ", job.ret
 
 
-def predict(run,prediction_set,param):
+def g_predict(run,dataset_fn,prediction_keys,param,set_name):
    """
    """
-   
+  
    """
 
    qp = QPalma()
    """
 
    qp = QPalma()
-   qp.predict(run,dataset_fn,prediction_keys,param):
+   qp.predict(run,dataset_fn,prediction_keys,param,set_name)
+
+   return 'finished prediction of set %s.' % set_name
 
 
 if __name__ == '__main__':
 
 
 if __name__ == '__main__':
-   create_and_submit():
+   create_and_submit()