OBJS = $(SRCS:%.cpp=%.o)
-#CXXFLAGS=-O3 -fPIC -g -ggdb
-CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC -I/usr/include/python2.5
-LDFLAGS=-lprofiler -L/fml/ag-raetsch/home/fabio/own_libs/libunwind/lib -lunwind-x86_64 -lunwind
+PY_INCL=`python-config --includes`
+PY_LIBS=`python-config --libs`
+CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC $(PY_INCL)
IF=QPalmaDP
all: $(OBJS) $(HDRS)
+ @ echo $(PY_INCL) $(PY_LIBS)
@ swig -c++ -python ${IF}.i
- @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
- @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+ @ g++ $(CXXFLAGS) -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+ @ g++ $(CXXFLAGS) -shared $(PY_LIBS) $(OBJS) ${IF}_wrap.o -o _${IF}.so
@ python -c "import ${IF}"
-test: $(OBJS) $(HDRS)
- g++ $(CXXFLAGS) $(LDFLAGS) -o test_fm debug_tools.o Mathmatics.o io.o penalty_info.o fill_matrix.o test_fill_matrix.cpp
-
clean:
@ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
penalty_struct currentPlif;
int ctr=0;
for (size_t z=0; z<nr_paths; z++) {
- //for (int z=0; z<nr_paths; z++) {
for(int estChar=1;estChar<6;estChar++) {
for(int dnaChar=0;dnaChar<6;dnaChar++) {
py_splice_align, py_est_align, py_mmatrix,
py_align_scores, py_q_scores);
- //PyTuple_SetItem(result,0,py_splice_align);
- //PyTuple_SetItem(result,1,py_est_align);
- //PyTuple_SetItem(result,2,py_mmatrix);
- //PyTuple_SetItem(result,3,py_align_scores);
- //PyTuple_SetItem(result,4,py_q_scores);
-
return result;
}
PROJ=ParaParser
-#CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -ggdb -fPIC -I/usr/include/python2.5
-CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -fPIC -I/usr/include/python2.5
+PY_INCL=`python-config --includes`
+PY_LIBS=`python-config --libs`
+
+CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -fPIC $(PY_INCL)
SRCS= ParaParser.cpp
all: $(OBJS)
@ echo "Building ParaParser module"
@ swig -c++ -python ${PROJ}.i
- @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${PROJ}_wrap.cxx -o ${PROJ}_wrap.o
- @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${PROJ}_wrap.o -o _${PROJ}.so
+ @ g++ $(CXXFLAGS) -c ${PROJ}_wrap.cxx -o ${PROJ}_wrap.o
+ @ g++ $(CXXFLAGS) -shared $(PY_LIBS) $(OBJS) ${PROJ}_wrap.o -o _${PROJ}.so
@ python -c "import ${PROJ}" && echo "Creation of ParaParser module was successful!"
clean:
rm *.o *.so *.cxx ${PROJ}.py ${PROJ}.pyc
-
accessWrapper = DataAccessWrapper(settings)
seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
- for line in open(map_file):
+ for line in open(settings['training_data_fn']):
line = line.strip()
if line.startswith('#') or line == '':
continue
dna_calc = dna_calc.replace('-','')
+
#print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
# Calculate the weights
trueWeightDon, trueWeightAcc, trueWeightIntron =\
ps = h.convert2SWIG()
- newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
- newQualityPlifsFeatures, unneeded1, unneeded2 =\
+ newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures, unused1, unused2 =\
performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
+
mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
# check for correct reshaping !!
if False:
self.plog("Is considered as: %d\n" % true_map[1])
-
- #result_len = currentAlignment.getResultLength()
-
dna_array,est_array = currentAlignment.getAlignmentArraysNew()
-
_newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
_newEstAlign = newEstAlign[0].flatten().tolist()[0]
# call solver every nth example / added constraint
#if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
+ if True:
objValue,w,self.slacks = solver.solve()
solver_call_ctr += 1
from qpalma.utils import logwrite
-Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
+Errormsg = """Usage is:
+
+ python qpalma_pipeline.py train <config filename> <training data filename>
+ or
+ python qpalma_pipeline.py predict <config filename> <parameter filename> <putative unspliced reads filename> <putative spliced reads filename>
+
+ """
class System:
logwrite('Parsed settings system set up.',self.settings)
- def training(self):
+ def training(self, training_data_fn):
"""
This function is responsible for the whole training process. It first
converts the data to the right format needed by QPalma for the training
print '\t\t\tStarting approximation...\n'
print '#'*80
- # When we are given only genomic reads we first generate artificially spliced
- # ones in order to generate a training set
- pre_task = TrainingPreprocessingTask(self.settings)
- pre_task.createJobs()
- pre_task.submit()
- pre_task.checkIfTaskFinished()
+ self.settings['training_data_fn'] = training_data_fn
# Collect the data and create a pickled training set
generateTrainingDataset(self.settings)
logwrite('End of training.\n',self.settings)
- def prediction(self):
+ def prediction(self, param_fn, unspliced_reads_fn, spliced_reads_fn):
"""
This function encapsulates all steps needed to perform a prediction. Given
the parameter of the training and paths to a prediction set it will
print '\t\t\tStarting approximation...\n'
print '#'*80
+ self.settings['prediction_param_fn'] = param_fn
+ self.settings['unspliced_reads_fn'] = unspliced_reads_fn
+ self.settings['spliced_reads_fn'] = spliced_reads_fn
+
# Before creating a candidate spliced read dataset we have to first filter
# the matches from the first seed finding run.
if __name__ == '__main__':
+ if len(sys.argv) != 4 or len(sys.argv) != 6:
+ print Errormsg
+ sys.exit(1)
+
mode = sys.argv[1]
- assert mode in ['predict','train'], Errormsg
+ if not mode in ['predict','train']:
+ print Errormsg
+ sys.exit(1)
+
filename = sys.argv[2]
- assert os.path.exists(filename), Errormsg
+ if not os.path.exists(filename):
+ print Errormsg
+ sys.exit(1)
# creating system object
system_obj = System(filename)
if mode == 'predict':
- system_obj.prediction()
+ param_fn = sys.argv[3]
+ unspliced_reads_fn = sys.argv[4]
+ spliced_reads_fn = sys.argv[5]
+ system_obj.prediction(unspliced_reads_fn, spliced_reads_fn)
elif mode == 'train':
- system_obj.training()
+ training_data_fn = sys.argv[3]
+ system_obj.training(training_data_fn)
else:
assert False
-
- #spliced_reads_fn
- #unspliced_reads_fn
- #prediction_param_fn
exons = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
- #dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
+ dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
return dataset
numExons = int(math.ceil(len(predExons) / 2))
print alignment_reconstruct(current_prediction,numExons)
- print id,start_pos,predExons
+ print id
+ print [e + start_pos for e in predExons]
## chr1 + 20-120
#read = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
print 'Problem counter is %d' % qp.problem_ctr
-
-
- def _testAlignments(self):
- run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
-
- run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
- run['name'] = 'test_run'
- run['result_dir'] = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
-
- param_fn = jp(run_dir,'param_526.pickle')
- param = cPickle.load(open(param_fn))
-
- print self.prediction_set
- for example_key in self.prediction_set.keys():
- print 'Current example %d' % example_key
-
- for example in self.prediction_set[example_key]:
- print example
- print 'size'
- print len(example)
-
- # fetch the data needed
- settings = {}
-
- settings['genome_dir'] = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
- settings['acceptor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
- settings['donor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
-
- settings['genome_file_fmt'] = 'chr%d.dna.flat'
- settings['splice_score_file_fmt']= 'contig_%d%s'
-
- allowed_fragments = [1]
- settings['allowed_fragments'] = allowed_fragments
-
- accessWrapper = DataAccessWrapper(settings)
- seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
-
- qp = QPalma(run,seqInfo,True)
- allPredictions = qp.predict(self.prediction_set,param)
-
- for current_prediction in allPredictions:
- align_str = print_prediction(current_prediction)
- print align_str
-
- id = current_prediction['id']
- seq = current_prediction['read']
- dna = current_prediction['dna']
- chromo = current_prediction['chr']
- strand = current_prediction['strand']
- start_pos = current_prediction['start_pos']
- predExons = current_prediction['predExons']
-
- numExons = int(math.ceil(len(predExons) / 2))
-
- print alignment_reconstruct(current_prediction,numExons)
- print id,start_pos,predExons
-
- print 'Problem counter is %d' % qp.problem_ctr
-
-
if __name__ == '__main__':
train_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
predict_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
- all_suites = unittest.TestSuite([train_suite, predict_suite])
+ #all_suites = unittest.TestSuite([train_suite, predict_suite])
+ all_suites = unittest.TestSuite([train_suite])
unittest.TextTestRunner(verbosity=2).run(all_suites)