+ fixed some bugs in the negative strand lookup table
[qpalma.git] / scripts / Experiment.py
index 4d2d4ad..c414d5a 100644 (file)
@@ -15,6 +15,19 @@ from Run import *
 import pdb
 import os
 import os.path
+import cPickle
+
+def get_dataset(dataset_size,num_nodes):
+   all_instances = []
+
+   params = [\
+   ('prediction_begin',400000),\
+   ('prediction_end',440000)]
+
+   all_instances.append(params)
+
+   return all_instances
+
 
 def get_dataset_splitting_instances(dataset_size,num_nodes):
    all_instances = []
@@ -40,6 +53,19 @@ def get_dataset_splitting_instances(dataset_size,num_nodes):
    return all_instances
 
 
+def get_training_instances():
+   all_instances = []
+
+   params = [\
+   ('enable_quality_scores',True),\
+   ('enable_splice_signals',True),\
+   ('enable_intron_length',True)]
+
+   all_instances.append(params)
+
+   return all_instances
+
+
 def get_scoring_instances():
    all_instances = []
 
@@ -56,43 +82,31 @@ def get_scoring_instances():
 
 
 def createRuns():
-   # specify n for n-fold cross validation
-   numFolds=5
+   # load the configuration object
+   Config = cPickle.load(open(Conf.conf_object_path))
 
    # the main directory where all results are stored
-   experiment_dir = '/fml/ag-raetsch/home/fabio/tmp/QPalma_single_run'
-
-   assert os.path.exists(experiment_dir), 'toplevel dir for experiment does not exist!'
-
+   alignment_dir   = Config['alignment_dir']
+   
    # list of regularization parameters and additional flags for different runs
    # for example:
    #  - with quality scores
    #  - without quality scores
    #
    bool2str = ['-','+']
-
-   allRuns = []
-
-   dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_2nd'
-
-   dataset_size = 400000
-   num_nodes = 10
-
    ctr = 1
 
-   #all_instances = get_scoring_instances()
-   all_instances = get_dataset_splitting_instances(dataset_size,num_nodes)
+   all_instances = get_training_instances()
             
+   allRuns = []
    for parameters in all_instances:
       # create a new Run object
       currentRun = Run()
-      currentName = 'run_'
+      currentName = 'run'
 
       for param_name,param in parameters:
          currentRun[param_name] = param
-         currentName += '%s_%s_' % (str(param_name),str(param))
-         print param_name,param
-         print currentName
+         currentName += '_%s_%s' % (str(param_name),str(bool2str[param]))
             
       # global settings for all runs
       currentRun['anzpath']            = Conf.anzpath
@@ -129,48 +143,36 @@ def createRuns():
       + currentRun['totalQualSuppPoints'] 
 
       # run-specific settings
-      currentRun['training_begin']        = Conf.training_begin
-      currentRun['training_end']          = Conf.training_end
-      #currentRun['prediction_begin']      = Conf.prediction_begin
-      #currentRun['prediction_end']        = Conf.prediction_end
-
       currentRun['C']                     = 100
 
       currentRun['name']                  = currentName
-      currentRun['dataset_filename']      = dataset_filename
-      currentRun['experiment_path']       = experiment_dir
+      currentRun['alignment_dir']         = alignment_dir
 
-      currentRun['min_intron_len'] = 20
-      currentRun['max_intron_len'] = 2000
+      currentRun['min_intron_len']        = 20
+      currentRun['max_intron_len']        = 2000
 
-      currentRun['min_svm_score'] = 0.0 
-      currentRun['max_svm_score'] = 1.0
+      currentRun['min_svm_score']         = 0.0 
+      currentRun['max_svm_score']         = 1.0
 
-      currentRun['min_qual'] = -5
-      currentRun['max_qual'] = 40
+      currentRun['min_qual']              = -5
+      currentRun['max_qual']              = 40
 
-      currentRun['dna_flat_files']      = Conf.dna_flat_fn
+      currentRun['dna_flat_files']        = Conf.dna_flat_fn
 
       currentRun['id']      = ctr
       ctr += 1
 
       allRuns.append(currentRun)
 
-
 ###############################################################################
 #
 # check for valid paths / options etc
 #
 ###############################################################################
 
-
    for currentRun in allRuns:
 
       assert 0 < currentRun['anzpath'] < 100
-      assert 0 <= currentRun['training_begin'] < currentRun['training_end']
-      assert currentRun['training_begin'] < currentRun['training_end'] 
-      assert currentRun['prediction_begin'] < currentRun['prediction_end']
-
       assert currentRun['iter_steps']
 
       #assert currentRun['matchmatrixCols']
@@ -202,8 +204,7 @@ def createRuns():
       assert currentRun['enable_intron_length']  in [True,False]
 
       #assert currentRun['totalQualSuppPoints']
-      assert os.path.exists(currentRun['dataset_filename'])
-      assert os.path.exists(currentRun['experiment_path'])
+      assert os.path.exists(currentRun['alignment_dir'])
 
    return allRuns