OBJS = $(SRCS:%.cpp=%.o)
-PY_INCL=`python-config --includes`
-PY_LIBS=`python-config --libs`
+PY_INCL=`python-config --cflags`
+PY_LIBS=`python-config --ldflags`
CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC $(PY_INCL)
PROJ=ParaParser
-PY_INCL=`python-config --includes`
-PY_LIBS=`python-config --libs`
+PY_INCL=`python-config --cflags`
+PY_LIBS=`python-config --ldflags`
CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -fPIC $(PY_INCL)
\end{center}
and ``\#'' for lines containing comments.
-
\subsection{Read format and internal representation}
The read input files for \QP contain the read sequences with their quality as
\item unique read id
\item chromosome/contig id
\item position of match in chromosome/contig (0-based, relative to positive strand)
-\item strand
+\item strand [D/P or +/-]
\item read sequence (in strand specific direction)
\item read quality (in strand specific direction)
\end{enumerate}
accessWrapper = DataAccessWrapper(settings)
seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
- for line in open(settings['training_data_fn']):
+ for line in open(settings['training_reads_fn']):
line = line.strip()
if line.startswith('#') or line == '':
continue
assert checkExons(dna,relative_exons,readAlignment,id)
- currentSeqInfo = (id,chromo)
+ currentSeqInfo = (id,chromo,strand,seqBeginning,seqEnd)
+
+ dataset[id] = (currentSeqInfo,readAlignment,[prb],exons)
- dataset.setdefault(id, []).append((currentSeqInfo,readAlignment,[prb],exons))
+ # dataset.setdefault(id, []).append()
saveData('training',dataset,settings)
num_splits = self.settings['num_splits']
- jp = os.path.join
-
dataset_fn = self.settings['prediction_dataset_fn']
prediction_keys_fn = self.settings['prediction_dataset_keys_fn']
"""
- jp = os.path.join
-
dataset_fn = self.settings['training_dataset_fn']
- training_keys = cPickle.load(open(self.settings['training_dataset_keys_fn']))
-
- print 'Found %d keys for training.' % len(training_keys)
set_name = 'training_set'
- current_job = KybJob(gridtools.AlignmentTaskStarter,[self.settings,dataset_fn,training_keys,set_name])
+ current_job = KybJob(gridtools.TrainingTaskStarter,[dataset_fn,self.settings,set_name])
current_job.h_vmem = '2.0G'
current_job.express = 'True'
pass
-def TrainingTaskStarter(settings,dataset_fn,training_keys,set_name):
+def TrainingTaskStarter(dataset_fn,settings,set_name):
accessWrapper = DataAccessWrapper(settings)
seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
qp = QPalma(seqInfo)
- qp.init_training(dataset_fn,training_keys,settings,set_name)
+ qp.init_training(dataset_fn,settings,set_name)
return 'finished prediction of set %s.' % set_name
print string
- def init_training(self,dataset_fn,training_keys,settings,set_name):
- full_working_path = jp(settings['training_dir'],run_name)
+ def init_training(self,dataset_fn,settings,set_name):
+ full_working_path = jp(settings['training_dir'],set_name)
#assert not os.path.exists(full_working_path)
if not os.path.exists(full_working_path):
printMessage('Starting dataset generation')
- self.settings['training_data_fn'] = training_data_fn
+ self.settings['training_reads_fn'] = training_data_fn
# Collect the data and create a pickled training set
generateTrainingDataset(self.settings)