+ added sample configuration file
authorFabio <fabio@congo.fml.local>
Thu, 2 Oct 2008 13:50:07 +0000 (15:50 +0200)
committerFabio <fabio@congo.fml.local>
Thu, 2 Oct 2008 13:50:07 +0000 (15:50 +0200)
+ minor modifications in the dataset utils

qpalma/DatasetUtils.py
scripts/qpalma_pipeline.py
test.conf [new file with mode: 0644]

index 1d90744..0da50fa 100644 (file)
@@ -60,8 +60,7 @@ def generatePredictionDataset(settings):
 
    dataset = {}
 
-   prb_offset = 64
-   #prb_offset = 50
+   prb_offset = settings['prb_offset']
 
    # This tuple specifies an interval for valid Illumina Genome Analyzer quality values
    if settings['platform'] == 'IGA':
index 20ed7b4..cffccc4 100644 (file)
@@ -99,10 +99,10 @@ class System:
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
-      #approx_task = ApproximationTask(self.settings)
-      #approx_task.CreateJobs()
-      #approx_task.Submit()
-      #approx_task.CheckIfTaskFinished()
+      approx_task = ApproximationTask(self.settings)
+      approx_task.CreateJobs()
+      approx_task.Submit()
+      approx_task.CheckIfTaskFinished()
       
       # After filtering combine the filtered matches from the first run and the
       # found matches from the second run to a full dataset
@@ -111,17 +111,17 @@ class System:
       print '\t\t\tStarting dataset generation...\n'
       print '#'*80
 
-      #generatePredictionDataset(self.settings)
+      generatePredictionDataset(self.settings)
 
       print '#'*80
       print '\t\t\tStarting alignments...\n'
       print '#'*80
 
       # Now that we have a dataset we can perform accurate alignments
-      #align_task = AlignmentTask(self.settings)
-      #align_task.CreateJobs()
-      #align_task.Submit()
-      #align_task.CheckIfTaskFinished()
+      align_task = AlignmentTask(self.settings)
+      align_task.CreateJobs()
+      align_task.Submit()
+      align_task.CheckIfTaskFinished()
 
       print '#'*80
       print '\t\t\tPostprocessing...\n'
diff --git a/test.conf b/test.conf
new file mode 100644 (file)
index 0000000..3ca1d96
--- /dev/null
+++ b/test.conf
@@ -0,0 +1,116 @@
+#
+# Global settings
+#
+
+
+#
+# Set this variable to True if you want to allow QPalma to perform more extensive checks
+#
+
+perform_checks = True
+
+#
+# You can set the platform.
+# Currently supported:
+#
+# - Set IGA for Illumina Genome Analyzer
+# - Set 454 for the Roche 454 platform (not implemented yet)
+
+platform = IGA
+
+#
+# This variable stores the path to the toplevel result directory 
+# 
+
+result_dir = /fml/ag-raetsch/home/fabio/tmp/sandbox/first_test
+
+#
+# These variables store the filename of the raw read data
+#
+
+spliced_reads_fn = /fml/ag-raetsch/home/fabio/tmp/sandbox/map_2nd.vm
+
+unspliced_reads_fn = /fml/ag-raetsch/home/fabio/tmp/sandbox/map.vm
+
+#
+# You can specify how many nodes you want to use via this variable
+#
+
+num_splits = 5
+
+#
+# Specifiy here the filename of the global logfile QPalma uses to report everything
+#
+
+global_log_fn = /fml/ag-raetsch/home/fabio/tmp/sandbox/first_test/qpalma.log
+
+#
+# The parameter you want to do prediction with:
+#
+
+prediction_parameter_fn = /fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+/param_526.pickle
+
+#
+# The run object which should become at some point obsolete but is still needed for now
+#
+
+run_fn = /fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+/run_obj.pickle
+
+#
+# In order to align short reads QPalma relies on additional information such as
+# splice site score prediction.
+#
+
+genome_dir           = /fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome
+acceptor_scores_loc  = /fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc
+donor_scores_loc    = /fml/ag-raetsch/home/fabio/tmp/interval_query_files/don
+
+genome_file_fmt      = chr%d.dna.flat
+splice_score_file_fmt= contig_%d%s
+
+#allowed_fragments = [1,2,3,4,5]
+allowed_fragments = [1]
+
+
+#
+# Settings concerning the prediction algorithm
+#
+
+#
+# 
+#
+
+
+
+#
+# This defines
+#
+
+half_window_size = 1500
+
+#
+# Settings concerning the training algorithm
+#
+#
+   
+numLengthSuppPoints  = 10
+numDonSuppPoints     = 10
+numAccSuppPoints     = 10
+matchmatrixRows      = 10
+matchmatrixCols      = 10
+numQualSuppPoints    = 10
+totalQualSuppPoints  = 10
+
+optimizer = MOSEK
+optimizer = CPLEX
+optimizer = CVXOPT
+
+#
+# BLAT, ShoRe and mGene
+#
+
+output_format = BLAT
+
+
+#prb_offset = 50
+prb_offset = 64