+ added minor functionality pprinting ...
[qpalma.git] / scripts / Experiment.py
1 ###############################################################################
2 #
3 # This file contains settings for one experiment
4 #
5 # The general idea is as follows:
6 #
7 # Suppose you have an machine learning algorithm you want to perform model
8 # selection with. Then for each different value of for example C for a C-SVM this
9 # script generates a Run object a subclass of dict storing the parameters.
10 #
11 ###############################################################################
12
13 import qpalma.Configuration as Conf
14 from Run import *
15 import pdb
16 import os
17 import os.path
18
19 def createRuns():
20 # specify n for n-fold cross validation
21 numFolds=5
22
23 # the main directory where all results are stored
24 experiment_dir = '/fml/ag-raetsch/home/fabio/tmp/QPalma'
25
26 assert os.path.exists(experiment_dir), 'toplevel dir for experiment does not exist!'
27
28 # list of regularization parameters and additional flags for different runs
29 # for example:
30 # - with quality scores
31 # - without quality scores
32 #
33 bool2str = ['-','+']
34
35 allRuns = []
36
37 dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/chr1_dataset.pickle'
38
39 for QFlag in [True,False]:
40 for SSFlag in [True,False]:
41 for ILFlag in [True]:
42
43 # create a new Run object
44 currentRun = Run()
45
46 # global settings for all runs
47 currentRun['anzpath'] = Conf.anzpath
48 currentRun['iter_steps'] = Conf.iter_steps
49 currentRun['matchmatrixRows'] = Conf.sizeMatchmatrix[0]
50 currentRun['matchmatrixCols'] = Conf.sizeMatchmatrix[1]
51 currentRun['mode'] = Conf.mode
52 currentRun['numFeatures'] = Conf.numFeatures
53 currentRun['numConstraintsPerRound'] = Conf.numConstraintsPerRound
54
55 currentRun['remove_duplicate_scores'] = Conf.remove_duplicate_scores
56 currentRun['print_matrix'] = Conf.print_matrix
57 currentRun['read_size'] = Conf.read_size
58
59 currentRun['numQualPlifs'] = Conf.numQualPlifs
60 currentRun['numQualSuppPoints'] = Conf.numQualSuppPoints
61 currentRun['totalQualSuppPoints'] = Conf.totalQualSuppPoints
62
63 # run-specific settings
64
65 currentRun['dataset_begin'] = 10
66 currentRun['dataset_end'] = 20
67
68 currentRun['enable_quality_scores'] = QFlag
69 currentRun['enable_splice_signals'] = SSFlag
70 currentRun['enable_intron_length'] = ILFlag
71
72 currentName = '%s_quality_%s_splicesignals_%s_intron_len' %\
73 (bool2str[QFlag],bool2str[SSFlag],bool2str[ILFlag])
74
75 currentRun['name'] = currentName
76
77 currentRun['dataset_filename'] = dataset_filename
78
79 allRuns.append(currentRun)
80
81 #
82 # check for valid paths / options etc
83 #
84 for currentRun in allRuns:
85
86 assert 0 < currentRun['anzpath'] < 100
87 assert 0 < currentRun['dataset_begin'] < currentRun['dataset_end']
88 assert currentRun['dataset_begin'] < currentRun['dataset_end']
89
90 assert currentRun['iter_steps']
91
92 #assert currentRun['matchmatrixCols']
93 #assert currentRun['matchmatrixRows']
94
95 assert currentRun['mode'] in ['normal','using_quality_scores']
96
97 #assert currentRun['numConstraintsPerRound']
98
99 assert 0 < currentRun['numFeatures'] < 10000
100
101 #assert currentRun['numQualPlifs']
102 #assert currentRun['numQualSuppPoints']
103
104 assert currentRun['print_matrix'] in [True,False]
105 assert 0 < currentRun['read_size'] < 100
106 assert currentRun['remove_duplicate_scores'] in [True,False]
107
108 assert currentRun['enable_quality_scores'] in [True,False]
109 assert currentRun['enable_splice_signals'] in [True,False]
110 assert currentRun['enable_intron_length'] in [True,False]
111
112 #assert currentRun['totalQualSuppPoints']
113 assert os.path.exists(currentRun['dataset_filename'])
114
115 return allRuns
116
117 if __name__ == '__main__':
118 allRuns = createRuns()
119 pdb.set_trace()
120