+ added minor functionality pprinting ...
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 11 Feb 2008 14:23:12 +0000 (14:23 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 11 Feb 2008 14:23:12 +0000 (14:23 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7782 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/Experiment.py
scripts/ModelSelection.py
scripts/Run.py

index e4143c2..bfb4517 100644 (file)
@@ -1,17 +1,20 @@
 ###############################################################################
 #
-# This file contains setting for one experiment
+# This file contains settings for one experiment
 #
 # The general idea is as follows:
 # 
 # Suppose you have an machine learning algorithm you want to perform model
-# selection. Then for each different value of for example C for a C-SVM this
+# selection with. Then for each different value of for example C for a C-SVM this
 # script generates a Run object a subclass of dict storing the parameters.
 #
 ###############################################################################
 
 import qpalma.Configuration as Conf
 from Run import *
+import pdb
+import os
+import os.path
 
 def createRuns():
    # specify n for n-fold cross validation
@@ -27,10 +30,12 @@ def createRuns():
    #  - with quality scores
    #  - without quality scores
    #
-   bool2str = ['-',_'+']
+   bool2str = ['-','+']
 
    allRuns = []
 
+   dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/chr1_dataset.pickle'
+
    for QFlag in [True,False]:
       for SSFlag in [True,False]:
          for ILFlag in [True]:
@@ -47,9 +52,9 @@ def createRuns():
             currentRun['numFeatures']        = Conf.numFeatures
             currentRun['numConstraintsPerRound'] = Conf.numConstraintsPerRound 
 
-            currentRun['print_matrix']             = Conf.print_matrix
-            0 < currentRun['read_size']            = Conf.read_size
             currentRun['remove_duplicate_scores']  = Conf.remove_duplicate_scores
+            currentRun['print_matrix']          = Conf.print_matrix
+            currentRun['read_size']             = Conf.read_size
 
             currentRun['numQualPlifs']          = Conf.numQualPlifs
             currentRun['numQualSuppPoints']     = Conf.numQualSuppPoints
@@ -57,8 +62,8 @@ def createRuns():
 
             # run-specific settings
 
-            currentRun['dataset_begin']         =
-            currentRun['dataset_end']           =
+            currentRun['dataset_begin']         = 10
+            currentRun['dataset_end']           = 20
 
             currentRun['enable_quality_scores'] = QFlag
             currentRun['enable_splice_signals'] = SSFlag
@@ -69,6 +74,8 @@ def createRuns():
 
             currentRun['name']                  = currentName
 
+            currentRun['dataset_filename']      = dataset_filename
+
             allRuns.append(currentRun)
 
    #
@@ -98,11 +105,16 @@ def createRuns():
       assert 0 < currentRun['read_size'] < 100
       assert currentRun['remove_duplicate_scores'] in [True,False]
 
-      currentRun['disable_quality_scores'] in [True,False]
+      assert currentRun['enable_quality_scores'] in [True,False]
+      assert currentRun['enable_splice_signals'] in [True,False]
+      assert currentRun['enable_intron_length']  in [True,False]
 
       #assert currentRun['totalQualSuppPoints']
+      assert os.path.exists(currentRun['dataset_filename'])
 
+   return allRuns
 
 if __name__ == '__main__':
-   createRuns()
+   allRuns = createRuns()
+   pdb.set_trace()
 
index ff4599d..a619b2a 100644 (file)
@@ -13,72 +13,26 @@ class Model:
    allInstances = []
    
    def __init__(self):
-      self.project_root = os.getcwd()
-      self.sampleDir = Configuration.sample_dir
-      self.resultRootDir = Configuration.result_dir
-      self.splits = Configuration.splits
-      self.numSplits = len(self.splits)
-      self.numFeatures = Configuration.numFeatures
-
-      assert os.path.isdir(self.sampleDir), 'Error your sample dir is not valid!!'
-      assert os.path.isdir(self.resultRootDir), 'Error your result root dir is not valid!!'
-
-      #self.possibleCs = [10**d for d in range(-2,4)]
-      self.possibleCs = [1.02]
-      self.possibleAlgos = [
-      #'noRescaling',
-      'marginRescaling'
-      #'slackRescalingWithMarginContraints',
-      #'slackRescalingLinearApprox'
-      ]
+      pass
 
    def createInstances(self):
-      os.system('rm -rf *.pickle')
-      os.system('rm -rf config*pickle')
-      instance_counter = 0
-
-      for algo in self.possibleAlgos:
-         for C in self.possibleCs:
-            resultDir = 'new_result_%.2f_%s'%(C,algo)
-            resultDir = os.path.join(self.resultRootDir,resultDir)
-            os.mkdir(resultDir)
-
-            # now iterate over all splits
-            for splitIdx in range(self.numSplits):
-               valIdx = (splitIdx+1)%self.numSplits
-               test = self.splits[splitIdx]
-               valid = self.splits[valIdx]
-               train = []
-               for j in range(self.numSplits):
-                  if j == splitIdx or j == valIdx:
-                     continue
-                  train.extend(self.splits[j])
-                  
-               os.chdir(self.project_root)
-
-               print train
-               print valid
-               print test
-               train.extend(valid)
-               print train
-
-               configObject = ConfigurationClass( instance_counter, self.numSplits, self.sampleDir,\
-               resultDir, train, test, self.numFeatures, C, algo)
-               currentInstance = LMM(configObject)
-               currentInstance = Subgradient(configObject)
-
-               #print 'instance created, starting to pickle configuration...'
-               fh = open('config_%d.pickle'%instance_counter,'w+')
-               cPickle.dump(configObject,fh)
-               fh.close()
-
-               #print 'starting to pickle algorithm object...'
-               fh = open('lmm_%d.pickle'%instance_counter,'w+')
-               cPickle.dump(currentInstance,fh)
-               fh.close()
-               instance_counter += 1 
 
-      self.numInstances = instance_counter
+      allRuns = Exp.createRuns()
+      
+      for currentRun in allRuns:
+               
+         currentInstance = QPalma(currentRun)
+         self.allInstances.append(currentInstance)
+
+         #print 'instance created, starting to pickle configuration...'
+         fh = open('config_%d.pickle'%instance_counter,'w+')
+         cPickle.dump(configObject,fh)
+         fh.close()
+
+         #print 'starting to pickle algorithm object...'
+         fh = open('lmm_%d.pickle'%instance_counter,'w+')
+         cPickle.dump(currentInstance,fh)
+         fh.close()
 
    def doSelection(self):
       for idx in range(self.numInstances):
index eadae9b..20f80f3 100644 (file)
@@ -13,7 +13,18 @@ class Run(dict):
    def __init__(self):
       pass
 
+   def __repr__(self):
+      
+      result = ""
+      for key,val in self.iteritems():
+         result += "%s : %s\n" % (key,str(val))
+
+      return result
+
 if __name__ == '__main__':
    r1 = Run()
-   r1['test'] = 12
-   print r1['test']
+   r1['attrib_1'] = 12
+   r1['attrib_2'] = 22
+
+   print "%s" % r1
+