eb913fd15f82bcc4a8951c42895ae678667ad4eb
2 # -*- coding: utf-8 -*-
8 ###############################################################################
10 # Load a random but fixed initial parameter vector this makes debugging easier
12 ###############################################################################
14 fixedParamQ
= cPickle
.load(open('/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/randInitParam.pickle'))
16 ###########################################################
18 # The parameters for the QPalma algorithm
29 ###############################################################################
33 # 'normal' means work like Palma 'using_quality_scores' means work like Palma
34 # plus using sequencing quality scores
36 ###############################################################################
39 mode
= 'using_quality_scores'
41 ###############################################################################
43 # When using quality scores our scoring function is defined as
47 # where S_e is {A,C,G,T,N} and S = {A,C,G,T,N,-}
49 # as opposed to a usage without quality scores when we only have
53 # The matrix of plifs is defined as follows:
56 # -------------------------
69 # so the index can be calculated as (estnum-1)*6 + dnanum.
71 # At ests we do not have gaps with quality scores so we look up the matchmatrix
73 ###############################################################################
75 #numLengthSuppPoints = 30
76 #numDonSuppPoints = 30
77 #numAccSuppPoints = 30
78 #numQualSuppPoints = 16
80 numLengthSuppPoints
= 20
91 sizeMatchmatrix
= (6,6)
94 numQualPlifs
= estPlifs
*dnaPlifs
95 elif mode
== 'using_quality_scores':
96 sizeMatchmatrix
= (6,1)
99 numQualPlifs
= estPlifs
*dnaPlifs
101 assert False, 'Wrong operation mode specified'
103 totalQualSuppPoints
= numQualPlifs
*numQualSuppPoints
105 numFeatures
= numDonSuppPoints
+ numAccSuppPoints\
106 + numLengthSuppPoints
+ sizeMatchmatrix
[0]*sizeMatchmatrix
[1] + totalQualSuppPoints
109 ###############################################################################
111 # GENERAL SETTINGS CONCERNING THE SOLVER
115 ###############################################################################
118 remove_duplicate_scores
= False
123 fixedParam
= fixedParam
[:numFeatures
]
124 elif mode
== 'using_quality_scores':
125 fixedParam
= fixedParamQ
[:numFeatures
]
127 assert False, 'Wrong operation mode specified'
129 ###############################################################################
131 # DATA SETTINGS CONCERNING THE SPLITS AND FILE LOCATIONS
135 ###############################################################################
140 prediction_begin
= 2000
141 prediction_end
= 3500
145 tmp_dir
= '/fml/ag-raetsch/home/fabio/tmp/solexa_tmp'
146 data_path
= '/fml/ag-raetsch/share/projects/qpalma/solexa'
148 original_path
= joinp(data_path
,'original_solexa_data')
149 annot_path
= joinp(data_path
,'annotation_data')
150 remapped_path
= joinp(data_path
,'remapped_solexa_data')
152 dna_flat_fn
= '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
153 gff_fn
= joinp(annot_path
,'TAIR7_GFF3_genes_Chr%s.gff_v1')
154 filtered_fn
= joinp(data_path
,'allFilteredReads_04_02_2008')
155 remapped_fn
= joinp(remapped_path
,'map_best_hit.18.unambig')
157 dataset_fn
= '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/chr1_dataset.pickle'
159 ###############################################################################
163 ###############################################################################
164 assert numQualPlifs
>= 0
165 assert numDonSuppPoints
> 1
166 assert numAccSuppPoints
> 1
167 assert numLengthSuppPoints
> 1
168 assert numQualSuppPoints
> 1
170 assert os
.path
.exists(dna_flat_fn
), 'DNA data does not exist!'
171 assert os
.path
.exists(filtered_fn
), 'EST/Reads data does not exist!'
172 assert os
.path
.exists(remapped_fn
), 'EST/Reads data does not exist!'