+ added basic scripts to generate runs for an experiment
[qpalma.git] / scripts / Experiment.py
1 ###############################################################################
2 #
3 # This file contains setting for one experiment
4 #
5 # The general idea is as follows:
6 #
7 # Suppose you have an machine learning algorithm you want to perform model
8 # selection. Then for each different value of for example C for a C-SVM this
9 # script generates a Run object a subclass of dict storing the parameters.
10 #
11 ###############################################################################
12
13 import qpalma.Configuration as Conf
14 from Run import *
15
16 def createRuns():
17 # specify n for n-fold cross validation
18 numFolds=5
19
20 # the main directory where all results are stored
21 experiment_dir = '/fml/ag-raetsch/home/fabio/tmp/QPalma'
22
23 assert os.path.exists(experiment_dir), 'toplevel dir for experiment does not exist!'
24
25 # list of regularization parameters and additional flags for different runs
26 # for example:
27 # - with quality scores
28 # - without quality scores
29 #
30 bool2str = ['-',_'+']
31
32 allRuns = []
33
34 for QFlag in [True,False]:
35 for SSFlag in [True,False]:
36 for ILFlag in [True]:
37
38 # create a new Run object
39 currentRun = Run()
40
41 # global settings for all runs
42 currentRun['anzpath'] = Conf.anzpath
43 currentRun['iter_steps'] = Conf.iter_steps
44 currentRun['matchmatrixRows'] = Conf.sizeMatchmatrix[0]
45 currentRun['matchmatrixCols'] = Conf.sizeMatchmatrix[1]
46 currentRun['mode'] = Conf.mode
47 currentRun['numFeatures'] = Conf.numFeatures
48 currentRun['numConstraintsPerRound'] = Conf.numConstraintsPerRound
49
50 currentRun['print_matrix'] = Conf.print_matrix
51 0 < currentRun['read_size'] = Conf.read_size
52 currentRun['remove_duplicate_scores'] = Conf.remove_duplicate_scores
53
54 currentRun['numQualPlifs'] = Conf.numQualPlifs
55 currentRun['numQualSuppPoints'] = Conf.numQualSuppPoints
56 currentRun['totalQualSuppPoints'] = Conf.totalQualSuppPoints
57
58 # run-specific settings
59
60 currentRun['dataset_begin'] =
61 currentRun['dataset_end'] =
62
63 currentRun['enable_quality_scores'] = QFlag
64 currentRun['enable_splice_signals'] = SSFlag
65 currentRun['enable_intron_length'] = ILFlag
66
67 currentName = '%s_quality_%s_splicesignals_%s_intron_len' %\
68 (bool2str[QFlag],bool2str[SSFlag],bool2str[ILFlag])
69
70 currentRun['name'] = currentName
71
72 allRuns.append(currentRun)
73
74 #
75 # check for valid paths / options etc
76 #
77 for currentRun in allRuns:
78
79 assert 0 < currentRun['anzpath'] < 100
80 assert 0 < currentRun['dataset_begin'] < currentRun['dataset_end']
81 assert currentRun['dataset_begin'] < currentRun['dataset_end']
82
83 assert currentRun['iter_steps']
84
85 #assert currentRun['matchmatrixCols']
86 #assert currentRun['matchmatrixRows']
87
88 assert currentRun['mode'] in ['normal','using_quality_scores']
89
90 #assert currentRun['numConstraintsPerRound']
91
92 assert 0 < currentRun['numFeatures'] < 10000
93
94 #assert currentRun['numQualPlifs']
95 #assert currentRun['numQualSuppPoints']
96
97 assert currentRun['print_matrix'] in [True,False]
98 assert 0 < currentRun['read_size'] < 100
99 assert currentRun['remove_duplicate_scores'] in [True,False]
100
101 currentRun['disable_quality_scores'] in [True,False]
102
103 #assert currentRun['totalQualSuppPoints']
104
105
106 if __name__ == '__main__':
107 createRuns()
108