+ update makefiles to fetch automatically valid Python includes and libs
authorFabio <fabio@congo.fml.local>
Thu, 23 Oct 2008 09:05:47 +0000 (11:05 +0200)
committerFabio <fabio@congo.fml.local>
Thu, 23 Oct 2008 09:05:47 +0000 (11:05 +0200)
DynProg/Makefile
ParaParser/Makefile
doc/qpalma-manual.tex
qpalma/DatasetUtils.py
qpalma/gridtools.py
qpalma/qpalma_main.py
scripts/qpalma_pipeline.py

index d025098..b701705 100644 (file)
@@ -18,8 +18,8 @@ HDRS= Mathmatics.h\
 
 OBJS = $(SRCS:%.cpp=%.o)
 
-PY_INCL=`python-config --includes`
-PY_LIBS=`python-config --libs`
+PY_INCL=`python-config --cflags`
+PY_LIBS=`python-config --ldflags`
 
 CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC $(PY_INCL)
 
index 3c20980..19061fa 100644 (file)
@@ -1,7 +1,7 @@
 PROJ=ParaParser
 
-PY_INCL=`python-config --includes`
-PY_LIBS=`python-config --libs`
+PY_INCL=`python-config --cflags`
+PY_LIBS=`python-config --ldflags`
 
 CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -fPIC $(PY_INCL)
 
index 2bbbe87..1fd7255 100644 (file)
@@ -251,7 +251,6 @@ key = value
 \end{center}
 and ``\#'' for lines containing comments.
 
-
 \subsection{Read format and internal representation}
 
 The read input files for \QP contain the read sequences with their quality as
@@ -263,7 +262,7 @@ corresponds to one short read. Each line has six tab-separated entries, namely:
 \item unique read id
 \item chromosome/contig id
 \item position of match in chromosome/contig (0-based, relative to positive strand)
-\item strand 
+\item strand [D/P or +/-]
 \item read sequence (in strand specific direction)
 \item read quality (in strand specific direction)
 \end{enumerate}
index f4db9d8..d16eb3c 100644 (file)
@@ -84,7 +84,7 @@ def generateTrainingDataset(settings):
    accessWrapper = DataAccessWrapper(settings)
    seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
-   for line in open(settings['training_data_fn']):
+   for line in open(settings['training_reads_fn']):
       line = line.strip()
       if line.startswith('#') or line == '':
          continue
@@ -130,9 +130,11 @@ def generateTrainingDataset(settings):
 
       assert checkExons(dna,relative_exons,readAlignment,id)
 
-      currentSeqInfo = (id,chromo)
+      currentSeqInfo = (id,chromo,strand,seqBeginning,seqEnd)
+
+      dataset[id] = (currentSeqInfo,readAlignment,[prb],exons)
       
-      dataset.setdefault(id, []).append((currentSeqInfo,readAlignment,[prb],exons))
+      # dataset.setdefault(id, []).append()
 
    saveData('training',dataset,settings)
 
index 57f4da3..6aeb856 100644 (file)
@@ -188,8 +188,6 @@ class AlignmentTask(ClusterTask):
 
       num_splits = self.settings['num_splits']
 
-      jp = os.path.join
-
       dataset_fn           = self.settings['prediction_dataset_fn']
       prediction_keys_fn   = self.settings['prediction_dataset_keys_fn']
 
@@ -246,16 +244,11 @@ class TrainingTask(ClusterTask):
 
       """
 
-      jp = os.path.join
-
       dataset_fn     = self.settings['training_dataset_fn']
-      training_keys  = cPickle.load(open(self.settings['training_dataset_keys_fn']))
-
-      print 'Found %d keys for training.' % len(training_keys)
 
       set_name = 'training_set'
 
-      current_job = KybJob(gridtools.AlignmentTaskStarter,[self.settings,dataset_fn,training_keys,set_name])
+      current_job = KybJob(gridtools.TrainingTaskStarter,[dataset_fn,self.settings,set_name])
       current_job.h_vmem = '2.0G'
       current_job.express = 'True'
 
@@ -270,11 +263,11 @@ class TrainingTask(ClusterTask):
       pass
 
 
-def TrainingTaskStarter(settings,dataset_fn,training_keys,set_name):
+def TrainingTaskStarter(dataset_fn,settings,set_name):
    accessWrapper = DataAccessWrapper(settings)
    seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
    qp = QPalma(seqInfo)
-   qp.init_training(dataset_fn,training_keys,settings,set_name)
+   qp.init_training(dataset_fn,settings,set_name)
    return 'finished prediction of set %s.' % set_name
 
 
index b856505..daf1975 100644 (file)
@@ -138,8 +138,8 @@ class QPalma:
          print string
 
 
-   def init_training(self,dataset_fn,training_keys,settings,set_name):
-      full_working_path = jp(settings['training_dir'],run_name)
+   def init_training(self,dataset_fn,settings,set_name):
+      full_working_path = jp(settings['training_dir'],set_name)
 
       #assert not os.path.exists(full_working_path)
       if not os.path.exists(full_working_path):
index 362e5f6..d0efc0c 100644 (file)
@@ -75,7 +75,7 @@ class System:
 
       printMessage('Starting dataset generation')
 
-      self.settings['training_data_fn'] = training_data_fn
+      self.settings['training_reads_fn'] = training_data_fn
 
       # Collect the data and create a pickled training set
       generateTrainingDataset(self.settings)