+ fixed automatic Python path settings in Makefiles
authorFabio <fabio@congo.fml.local>
Wed, 22 Oct 2008 09:39:32 +0000 (11:39 +0200)
committerFabio <fabio@congo.fml.local>
Wed, 22 Oct 2008 09:39:32 +0000 (11:39 +0200)
DynProg/Makefile
DynProg/qpalma_dp.cpp
ParaParser/Makefile
qpalma/DatasetUtils.py
qpalma/qpalma_main.py
scripts/qpalma_pipeline.py
tests/test_qpalma.py

index 9232ee2..d025098 100644 (file)
@@ -18,21 +18,19 @@ HDRS= Mathmatics.h\
 
 OBJS = $(SRCS:%.cpp=%.o)
 
-#CXXFLAGS=-O3 -fPIC -g -ggdb 
-CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC -I/usr/include/python2.5
-LDFLAGS=-lprofiler -L/fml/ag-raetsch/home/fabio/own_libs/libunwind/lib -lunwind-x86_64 -lunwind
+PY_INCL=`python-config --includes`
+PY_LIBS=`python-config --libs`
 
+CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC $(PY_INCL)
 
 IF=QPalmaDP
 
 all: $(OBJS) $(HDRS)
+       @ echo $(PY_INCL) $(PY_LIBS)
        @ swig -c++ -python ${IF}.i
-       @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
-       @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+       @ g++ $(CXXFLAGS) -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+       @ g++ $(CXXFLAGS) -shared $(PY_LIBS) $(OBJS) ${IF}_wrap.o -o _${IF}.so
        @ python -c "import ${IF}"
 
-test: $(OBJS) $(HDRS) 
-       g++ $(CXXFLAGS) $(LDFLAGS) -o test_fm  debug_tools.o Mathmatics.o io.o penalty_info.o fill_matrix.o test_fill_matrix.cpp 
-
 clean:
        @ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
index f55c1e2..1039458 100644 (file)
@@ -330,7 +330,6 @@ PyObject* Alignment::getAlignmentResultsNew() {
       penalty_struct currentPlif;
       int ctr=0;
       for (size_t z=0; z<nr_paths; z++) {
-      //for (int z=0; z<nr_paths; z++) {
          for(int estChar=1;estChar<6;estChar++) {
             for(int dnaChar=0;dnaChar<6;dnaChar++) {
 
@@ -352,12 +351,6 @@ PyObject* Alignment::getAlignmentResultsNew() {
    py_splice_align, py_est_align, py_mmatrix,
    py_align_scores, py_q_scores);
 
-   //PyTuple_SetItem(result,0,py_splice_align);
-   //PyTuple_SetItem(result,1,py_est_align);
-   //PyTuple_SetItem(result,2,py_mmatrix);
-   //PyTuple_SetItem(result,3,py_align_scores);
-   //PyTuple_SetItem(result,4,py_q_scores);
-
    return result;
 }
 
index 0d06b78..3c20980 100644 (file)
@@ -1,7 +1,9 @@
 PROJ=ParaParser
 
-#CXXFLAGS=-Wall  -Wshadow -std=c++98 -O3 -ggdb -fPIC -I/usr/include/python2.5
-CXXFLAGS=-Wall  -Wshadow -std=c++98 -O3 -fPIC -I/usr/include/python2.5
+PY_INCL=`python-config --includes`
+PY_LIBS=`python-config --libs`
+
+CXXFLAGS=-Wall -Wshadow -std=c++98 -O3 -fPIC $(PY_INCL)
 
 SRCS= ParaParser.cpp
 
@@ -10,10 +12,9 @@ OBJS = $(SRCS:%.cpp=%.o)
 all: $(OBJS)
        @ echo "Building ParaParser module"
        @ swig -c++ -python ${PROJ}.i
-       @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${PROJ}_wrap.cxx -o ${PROJ}_wrap.o
-       @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${PROJ}_wrap.o -o _${PROJ}.so
+       @ g++ $(CXXFLAGS) -c ${PROJ}_wrap.cxx -o ${PROJ}_wrap.o
+       @ g++ $(CXXFLAGS) -shared $(PY_LIBS) $(OBJS) ${PROJ}_wrap.o -o _${PROJ}.so
        @ python -c "import ${PROJ}" && echo "Creation of ParaParser module was successful!"
 
 clean:
        rm *.o *.so *.cxx ${PROJ}.py ${PROJ}.pyc
-
index 6c41062..4116e12 100644 (file)
@@ -79,7 +79,7 @@ def generateTrainingDataset(settings):
    accessWrapper = DataAccessWrapper(settings)
    seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
 
-   for line in open(map_file):
+   for line in open(settings['training_data_fn']):
       line = line.strip()
       if line.startswith('#') or line == '':
          continue
index afc32d1..b856505 100644 (file)
@@ -263,6 +263,7 @@ class QPalma:
 
             dna_calc = dna_calc.replace('-','')
 
+
             #print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
             # Calculate the weights
             trueWeightDon, trueWeightAcc, trueWeightIntron =\
@@ -297,9 +298,9 @@ class QPalma:
 
             ps = h.convert2SWIG()
 
-            newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
-            newQualityPlifsFeatures, unneeded1, unneeded2 =\
+            newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures, unused1, unused2 =\
             performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
+
             mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
 
             # check for correct reshaping !!
@@ -375,11 +376,7 @@ class QPalma:
 
                if False:
                   self.plog("Is considered as: %d\n" % true_map[1])
-
-                  #result_len = currentAlignment.getResultLength()
-
                   dna_array,est_array = currentAlignment.getAlignmentArraysNew()
-
                   _newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
                   _newEstAlign = newEstAlign[0].flatten().tolist()[0]
 
@@ -397,6 +394,7 @@ class QPalma:
 
             # call solver every nth example / added constraint
             #if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
+            if True:
                objValue,w,self.slacks = solver.solve()
                solver_call_ctr += 1
 
index b53c7f4..8c9e501 100644 (file)
@@ -28,7 +28,13 @@ from qpalma.SettingsParser import parseSettings
 from qpalma.utils import logwrite
 
 
-Errormsg = """Usage is: python qpalma_pipeline.py predict|train <config filename>"""
+Errormsg = """Usage is:  
+                  
+                  python qpalma_pipeline.py train <config filename> <training data filename>
+              or
+                  python qpalma_pipeline.py predict <config filename> <parameter filename> <putative unspliced reads filename> <putative spliced reads filename>
+                  
+           """
 
 
 class System:
@@ -53,7 +59,7 @@ class System:
       logwrite('Parsed settings system set up.',self.settings)
 
 
-   def training(self):
+   def training(self, training_data_fn):
       """
       This function is responsible for the whole training process. It first
       converts the data to the right format needed by QPalma for the training
@@ -65,12 +71,7 @@ class System:
       print '\t\t\tStarting approximation...\n'
       print '#'*80
 
-      # When we are given only genomic reads we first generate artificially spliced
-      # ones in order to generate a training set
-      pre_task = TrainingPreprocessingTask(self.settings)
-      pre_task.createJobs()
-      pre_task.submit() 
-      pre_task.checkIfTaskFinished()
+      self.settings['training_data_fn'] = training_data_fn
 
       # Collect the data and create a pickled training set
       generateTrainingDataset(self.settings)
@@ -84,7 +85,7 @@ class System:
       logwrite('End of training.\n',self.settings)
 
 
-   def prediction(self):
+   def prediction(self, param_fn, unspliced_reads_fn, spliced_reads_fn):
       """
       This function encapsulates all steps needed to perform a prediction. Given
       the parameter of the training and paths to a prediction set it will
@@ -97,6 +98,10 @@ class System:
       print '\t\t\tStarting approximation...\n'
       print '#'*80
 
+      self.settings['prediction_param_fn'] = param_fn
+      self.settings['unspliced_reads_fn']  = unspliced_reads_fn
+      self.settings['spliced_reads_fn']    = spliced_reads_fn
+
       # Before creating a candidate spliced read dataset we have to first filter
       # the matches from the first seed finding run.
 
@@ -139,21 +144,30 @@ class System:
    
 
 if __name__ == '__main__':
+   if len(sys.argv) != 4 or len(sys.argv) != 6:
+      print Errormsg
+      sys.exit(1)
+
    mode     = sys.argv[1]
-   assert mode in ['predict','train'], Errormsg
+   if not mode in ['predict','train']:
+      print Errormsg
+      sys.exit(1)
+
    filename = sys.argv[2]
-   assert os.path.exists(filename), Errormsg
+   if not os.path.exists(filename):
+      print Errormsg
+      sys.exit(1)
 
    # creating system object
    system_obj = System(filename)
 
    if mode == 'predict':
-      system_obj.prediction()
+      param_fn           = sys.argv[3]
+      unspliced_reads_fn = sys.argv[4]
+      spliced_reads_fn   = sys.argv[5]
+      system_obj.prediction(unspliced_reads_fn, spliced_reads_fn)
    elif mode == 'train':
-      system_obj.training()
+      training_data_fn   = sys.argv[3]
+      system_obj.training(training_data_fn)
    else:
       assert False
-
-   #spliced_reads_fn 
-   #unspliced_reads_fn 
-   #prediction_param_fn
index 2369f19..83a959e 100644 (file)
@@ -83,7 +83,7 @@ def createTrainingSet():
 
    exons      = numpy.mat([1660,1687,1774,1801]).reshape((2,2))
 
-   #dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
+   dataset[id] = (currentSeqInfo,originalRead,currentQualities,exons)
 
    return dataset
 
@@ -242,7 +242,8 @@ class TestQPalmaPrediction(unittest.TestCase):
          numExons = int(math.ceil(len(predExons) / 2))
          
          print alignment_reconstruct(current_prediction,numExons)
-         print id,start_pos,predExons
+         print id
+         print [e + start_pos for e in predExons]
 
       ## chr1 +  20-120
       #read  = 'catctatgcaacagcattacagtgatcaccggcccaaaaaacctgtgtctggggttttgcctgatgatagcagtgatactgaaactggatcaatggtaag'
@@ -322,68 +323,9 @@ class TestQPalmaPrediction(unittest.TestCase):
       print 'Problem counter is %d' % qp.problem_ctr 
 
 
-
-
-   def _testAlignments(self):
-      run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
-
-      run   = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
-      run['name'] = 'test_run'
-      run['result_dir']    = '/fml/ag-raetsch/home/fabio/tmp/sandbox/testcases'
-
-      param_fn = jp(run_dir,'param_526.pickle')
-      param = cPickle.load(open(param_fn))
-
-      print self.prediction_set
-      for example_key in self.prediction_set.keys():
-         print 'Current example %d' % example_key
-
-         for example in self.prediction_set[example_key]:
-            print example
-            print 'size'
-            print len(example)
-
-      # fetch the data needed
-      settings = {}
-
-      settings['genome_dir']           = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
-      settings['acceptor_scores_loc']  = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
-      settings['donor_scores_loc']     = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
-
-      settings['genome_file_fmt']      = 'chr%d.dna.flat'
-      settings['splice_score_file_fmt']= 'contig_%d%s'
-
-      allowed_fragments = [1]
-      settings['allowed_fragments'] = allowed_fragments
-
-      accessWrapper = DataAccessWrapper(settings)
-      seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
-
-      qp = QPalma(run,seqInfo,True)
-      allPredictions = qp.predict(self.prediction_set,param)
-
-      for current_prediction in allPredictions:
-         align_str = print_prediction(current_prediction)
-         print align_str
-
-         id          = current_prediction['id']
-         seq         = current_prediction['read']
-         dna         = current_prediction['dna']
-         chromo      = current_prediction['chr']
-         strand      = current_prediction['strand']
-         start_pos   = current_prediction['start_pos']
-         predExons   = current_prediction['predExons']
-
-         numExons = int(math.ceil(len(predExons) / 2))
-         
-         print alignment_reconstruct(current_prediction,numExons)
-         print id,start_pos,predExons
-
-      print 'Problem counter is %d' % qp.problem_ctr 
-
-
 if __name__ == '__main__':
    train_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaTraining)
    predict_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
-   all_suites = unittest.TestSuite([train_suite, predict_suite])
+   #all_suites = unittest.TestSuite([train_suite, predict_suite])
+   all_suites = unittest.TestSuite([train_suite])
    unittest.TextTestRunner(verbosity=2).run(all_suites)