+ fixed automatic Python path settings in Makefiles
authorFabio <fabio@congo.fml.local>
Wed, 22 Oct 2008 09:39:32 +0000 (11:39 +0200)
committerFabio <fabio@congo.fml.local>
Wed, 22 Oct 2008 09:39:32 +0000 (11:39 +0200)
DynProg/Makefile
DynProg/qpalma_dp.cpp
ParaParser/Makefile
qpalma/DatasetUtils.py
qpalma/qpalma_main.py
scripts/qpalma_pipeline.py
tests/test_qpalma.py

index 9232ee2..d025098 100644 (file)
@@ -18,21 +18,19 @@ HDRS= Mathmatics.h\
 
 OBJS = $(SRCS:%.cpp=%.o)
 
 
 OBJS = $(SRCS:%.cpp=%.o)
 
-#CXXFLAGS=-O3 -fPIC -g -ggdb 
-CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC -I/usr/include/python2.5
-LDFLAGS=-lprofiler -L/fml/ag-raetsch/home/fabio/own_libs/libunwind/lib -lunwind-x86_64 -lunwind
+PY_INCL=`python-config --includes`
+PY_LIBS=`python-config --libs`
 
 
+CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC $(PY_INCL)
 
 IF=QPalmaDP
 
 all: $(OBJS) $(HDRS)
 
 IF=QPalmaDP
 
 all: $(OBJS) $(HDRS)
+       @ echo $(PY_INCL) $(PY_LIBS)
        @ swig -c++ -python ${IF}.i
        @ swig -c++ -python ${IF}.i
-       @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
-       @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+       @ g++ $(CXXFLAGS) -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+       @ g++ $(CXXFLAGS) -shared $(PY_LIBS) $(OBJS) ${IF}_wrap.o -o _${IF}.so
        @ python -c "import ${IF}"
 
        @ python -c "import ${IF}"
 
-test: $(OBJS) $(HDRS) 
-       g++ $(CXXFLAGS) $(LDFLAGS) -o test_fm  debug_tools.o Mathmatics.o io.o penalty_info.o fill_matrix.o test_fill_matrix.cpp 
-
 clean:
        @ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
 clean:
        @ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
index f55c1e2..1039458 100644 (file)
@@ -330,7 +330,6 @@ PyObject* Alignment::getAlignmentResultsNew() {
       penalty_struct currentPlif;
       int ctr=0;
       for (size_t z=0; z<nr_paths; z++) {
       penalty_struct currentPlif;
       int ctr=0;
       for (size_t z=0; z<nr_paths; z++) {
-      //for (int z=0; z<nr_paths; z++) {
          for(int estChar=1;estChar<6;estChar++) {
             for(int dnaChar=0;dnaChar<6;dnaChar++) {
 
          for(int estChar=1;estChar<6;estChar++) {
             for(int dnaChar=0;dnaChar<6;dnaChar++) {
 
@@ -352,12 +351,6 @@ PyObject* Alignment::getAlignmentResultsNew() {
    py_splice_align, py_est_align, py_mmatrix,
    py_align_scores, py_q_scores);
 
    py_splice_align, py_est_align, py_mmatrix,
    py_align_scores, py_q_scores);
 
-   //PyTuple_SetItem(result,0,py_splice_align);
-   //PyTuple_SetItem(result,1,py_est_align);
-   //PyTuple_SetItem(result,2,py_mmatrix);
-   //PyTuple_SetItem(result,3,py_align_scores);
-   //PyTuple_SetItem(result,4,py_q_scores);
-
    return result;
 }
 
    return result;
 }
 
index 0d06b78..3c20980 100644 (file)
@@ -1,7 +1,9 @@
 PROJ=ParaParser
 
 PROJ=ParaParser
 
-#CXXFLAGS=-Wall  -Wshadow -std=c++98 -O3 -ggdb -fPIC -I/usr/include/python2.5
-CXXFLAGS=-Wall  -Wshadow -std=c++98 -O3 -fPIC -I/usr/include/python2.5
+PY_INCL=`python-config --includes`
+PY_LIBS=`python-config --libs`
+
+CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -fPIC $(PY_INCL)
 
 SRCS= ParaParser.cpp
 
 
 SRCS= ParaParser.cpp
 
@@ -10,10 +12,9 @@ OBJS = $(SRCS:%.cpp=%.o)
 all: $(OBJS)
        @ echo "Building ParaParser module"
        @ swig -c++ -python ${PROJ}.i
 all: $(OBJS)
        @ echo "Building ParaParser module"
        @ swig -c++ -python ${PROJ}.i
-       @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${PROJ}_wrap.cxx -o ${PROJ}_wrap.o
-       @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${PROJ}_wrap.o -o _${PROJ}.so
+       @ g++ $(CXXFLAGS) -c ${PROJ}_wrap.cxx -o ${PROJ}_wrap.o
+       @ g++ $(CXXFLAGS) -shared $(PY_LIBS) $(OBJS) ${PROJ}_wrap.o -o _${PROJ}.so
        @ python -c "import ${PROJ}" && echo "Creation of ParaParser module was successful!"
 
 clean:
        rm *.o *.so *.cxx ${PROJ}.py ${PROJ}.pyc
        @ python -c "import ${PROJ}" && echo "Creation of ParaParser module was successful!"
 
 clean:
        rm *.o *.so *.cxx ${PROJ}.py ${PROJ}.pyc
-
index 6c41062..4116e12 100644 (file)
@@ -79,7 +79,7 @@ def generateTrainingDataset(settings):
    accessWrapper = DataAccessWrapper(settings)
    seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
    accessWrapper = DataAccessWrapper(settings)
    seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
-   for line in open(map_file):
+   for line in open(settings['training_data_fn']):
       line = line.strip()
       if line.startswith('#') or line == '':
          continue
       line = line.strip()
       if line.startswith('#') or line == '':
          continue
index afc32d1..b856505 100644 (file)
@@ -263,6 +263,7 @@ class QPalma:
 
             dna_calc = dna_calc.replace('-','')
 
 
             dna_calc = dna_calc.replace('-','')
 
+
             #print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron =\
             #print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron =\
@@ -297,9 +298,9 @@ class QPalma:
 
             ps = h.convert2SWIG()
 
 
             ps = h.convert2SWIG()
 
-            newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
-            newQualityPlifsFeatures, unneeded1, unneeded2 =\
+            newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures, unused1, unused2 =\
             performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
             performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
+
             mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
             # check for correct reshaping !!
             mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
             # check for correct reshaping !!
@@ -375,11 +376,7 @@ class QPalma:
 
                if False:
                   self.plog("Is considered as: %d\n" % true_map[1])
 
                if False:
                   self.plog("Is considered as: %d\n" % true_map[1])
-
-                  #result_len = currentAlignment.getResultLength()
-
                   dna_array,est_array = currentAlignment.getAlignmentArraysNew()
                   dna_array,est_array = currentAlignment.getAlignmentArraysNew()
-
                   _newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
                   _newEstAlign = newEstAlign[0].flatten().tolist()[0]
 
                   _newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
                   _newEstAlign = newEstAlign[0].flatten().tolist()[0]
 
@@ -397,6 +394,7 @@ class QPalma:
 
             # call solver every nth example / added constraint
             #if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
 
             # call solver every nth example / added constraint
             #if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
+            if True:
                objValue,w,self.slacks = solver.solve()
                solver_call_ctr += 1
 
                objValue,w,self.slacks = solver.solve()
                solver_call_ctr += 1
 
index b53c7f4..8c9e501 100644 (file)
@@ -28,7 +28,13 @@ from qpalma.SettingsParser import parseSettings
 from qpalma.utils import logwrite
 
 
 from qpalma.utils import logwrite
 
 
-Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
+Errormsg = """Usage is:  
+                  
+                  python qpalma_pipeline.py train <config filename> <training data filename>
+              or
+                  python qpalma_pipeline.py predict <config filename> <parameter filename> <putative unspliced reads filename> <putative spliced reads filename>
+                  
+           """
 
 
 class System:
 
 
 class System:
@@ -53,7 +59,7 @@ class System:
       logwrite('Parsed settings system set up.',self.settings)
 
 
       logwrite('Parsed settings system set up.',self.settings)
 
 
-   def training(self):
+   def training(self, training_data_fn):
       """
       This function is responsible for the whole training process. It first
       converts the data to the right format needed by QPalma for the training
       """
       This function is responsible for the whole training process. It first
       converts the data to the right format needed by QPalma for the training
@@ -65,12 +71,7 @@ class System:
       print '\t\t\tStarting approximation...\n'
       print '#'*80
 
       print '\t\t\tStarting approximation...\n'
       print '#'*80
 
-      # When we are given only genomic reads we first generate artificially spliced
-      # ones in order to generate a training set
-      pre_task = TrainingPreprocessingTask(self.settings)
-      pre_task.createJobs()
-      pre_task.submit() 
-      pre_task.checkIfTaskFinished()
+      self.settings['training_data_fn'] = training_data_fn
 
       # Collect the data and create a pickled training set
       generateTrainingDataset(self.settings)
 
       # Collect the data and create a pickled training set
       generateTrainingDataset(self.settings)
@@ -84,7 +85,7 @@ class System:
       logwrite('End of training.\n',self.settings)
 
 
       logwrite('End of training.\n',self.settings)
 
 
-   def prediction(self):
+   def prediction(self, param_fn, unspliced_reads_fn, spliced_reads_fn):
       """
       This function encapsulates all steps needed to perform a prediction. Given
       the parameter of the training and paths to a prediction set it will
       """
       This function encapsulates all steps needed to perform a prediction. Given
       the parameter of the training and paths to a prediction set it will
@@ -97,6 +98,10 @@ class System:
       print '\t\t\tStarting approximation...\n'
       print '#'*80
 
       print '\t\t\tStarting approximation...\n'
       print '#'*80
 
+      self.settings['prediction_param_fn'] = param_fn
+      self.settings['unspliced_reads_fn']  = unspliced_reads_fn
+      self.settings['spliced_reads_fn']    = spliced_reads_fn
+
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
@@ -139,21 +144,30 @@ class System:
    
 
 if __name__ == '__main__':
    
 
 if __name__ == '__main__':
+   if len(sys.argv) != 4 or len(sys.argv) != 6:
+      print Errormsg
+      sys.exit(1)
+
    mode     = sys.argv[1]
    mode     = sys.argv[1]
-   assert mode in ['predict','train'], Errormsg
+   if not mode in ['predict','train']:
+      print Errormsg
+      sys.exit(1)
+
    filename = sys.argv[2]
    filename = sys.argv[2]
-   assert os.path.exists(filename), Errormsg
+   if not os.path.exists(filename):
+      print Errormsg
+      sys.exit(1)
 
    # creating system object
    system_obj = System(filename)
 
    if mode == 'predict':
 
    # creating system object
    system_obj = System(filename)
 
    if mode == 'predict':
-      system_obj.prediction()
+      param_fn           = sys.argv[3]
+      unspliced_reads_fn = sys.argv[4]
+      spliced_reads_fn   = sys.argv[5]
+      system_obj.prediction(unspliced_reads_fn, spliced_reads_fn)
    elif mode == 'train':
    elif mode == 'train':
-      system_obj.training()
+      training_data_fn   = sys.argv[3]
+      system_obj.training(training_data_fn)
    else:
       assert False
    else:
       assert False
-
-   #spliced_reads_fn 
-   #unspliced_reads_fn 
-   #prediction_param_fn
index 2369f19..83a959e 100644 (file)
@@ -83,7 +83,7 @@ def createTrainingSet():
 
    exons      = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
 
 
    exons      = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
 
-   #dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
+   dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
 
    return dataset
 
 
    return dataset
 
@@ -242,7 +242,8 @@ class TestQPalmaPrediction(unittest.TestCase):
          numExons = int(math.ceil(len(predExons) / 2))
          
          print alignment_reconstruct(current_prediction,numExons)
          numExons = int(math.ceil(len(predExons) / 2))
          
          print alignment_reconstruct(current_prediction,numExons)
-         print id,start_pos,predExons
+         print id
+         print [e + start_pos for e in predExons]
 
       ## chr1 +  20-120
       #read  = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
 
       ## chr1 +  20-120
       #read  = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
@@ -322,68 +323,9 @@ class TestQPalmaPrediction(unittest.TestCase):
       print 'Problem counter is %d' % qp.problem_ctr 
 
 
       print 'Problem counter is %d' % qp.problem_ctr 
 
 
-
-
-   def _testAlignments(self):
-      run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
-
-      run   = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
-      run['name'] = 'test_run'
-      run['result_dir']    = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
-
-      param_fn = jp(run_dir,'param_526.pickle')
-      param = cPickle.load(open(param_fn))
-
-      print self.prediction_set
-      for example_key in self.prediction_set.keys():
-         print 'Current example %d' % example_key
-
-         for example in self.prediction_set[example_key]:
-            print example
-            print 'size'
-            print len(example)
-
-      # fetch the data needed
-      settings = {}
-
-      settings['genome_dir']           = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
-      settings['acceptor_scores_loc']  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
-      settings['donor_scores_loc']     = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
-
-      settings['genome_file_fmt']      = 'chr%d.dna.flat'
-      settings['splice_score_file_fmt']= 'contig_%d%s'
-
-      allowed_fragments = [1]
-      settings['allowed_fragments'] = allowed_fragments
-
-      accessWrapper = DataAccessWrapper(settings)
-      seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
-
-      qp = QPalma(run,seqInfo,True)
-      allPredictions = qp.predict(self.prediction_set,param)
-
-      for current_prediction in allPredictions:
-         align_str = print_prediction(current_prediction)
-         print align_str
-
-         id          = current_prediction['id']
-         seq         = current_prediction['read']
-         dna         = current_prediction['dna']
-         chromo      = current_prediction['chr']
-         strand      = current_prediction['strand']
-         start_pos   = current_prediction['start_pos']
-         predExons   = current_prediction['predExons']
-
-         numExons = int(math.ceil(len(predExons) / 2))
-         
-         print alignment_reconstruct(current_prediction,numExons)
-         print id,start_pos,predExons
-
-      print 'Problem counter is %d' % qp.problem_ctr 
-
-
 if __name__ == '__main__':
    train_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
    predict_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
 if __name__ == '__main__':
    train_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
    predict_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
-   all_suites = unittest.TestSuite([train_suite, predict_suite])
+   #all_suites = unittest.TestSuite([train_suite, predict_suite])
+   all_suites = unittest.TestSuite([train_suite])
    unittest.TextTestRunner(verbosity=2).run(all_suites)
    unittest.TextTestRunner(verbosity=2).run(all_suites)