+ dataset size is smaller now (just storing indices).
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 25 Feb 2008 11:01:20 +0000 (11:01 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 25 Feb 2008 11:01:20 +0000 (11:01 +0000)
+ started refactoring mainloop

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7948 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/Experiment.py
scripts/ModelSelection.py
scripts/Run.py
scripts/compile_dataset.py
scripts/qpalma_main.py

index b98e6b4..29dba2d 100644 (file)
@@ -21,7 +21,7 @@ def createRuns():
    numFolds=5
 
    # the main directory where all results are stored
-   experiment_dir = '/fml/ag-raetsch/home/fabio/tmp/QPalma'
+   experiment_dir = '/fml/ag-raetsch/home/fabio/tmp/QPalma_test'
 
    assert os.path.exists(experiment_dir), 'toplevel dir for experiment does not exist!'
 
@@ -34,7 +34,9 @@ def createRuns():
 
    allRuns = []
 
-   dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_10k'
+   #dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_10k'
+   #dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_remapped'
+   dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/dataset_remapped_1k'
    #dataset_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/new_dataset_100'
 
    for QFlag in [True,False]:
@@ -87,7 +89,7 @@ def createRuns():
 
             currentRun['name']                  = currentName
             currentRun['dataset_filename']      = dataset_filename
-            currentRun['experiment_path']       = '/fml/ag-raetsch/home/fabio/tmp/QPalma'
+            currentRun['experiment_path']       = experiment_dir
 
             currentRun['min_intron_len'] = 20
             currentRun['max_intron_len'] = 2000
index c0c84c3..03d0704 100644 (file)
@@ -33,7 +33,6 @@ class Model:
 
          self.allInstances.append(instance_fn)
 
-
    def doSelection(self):
       for instance in self.allInstances:
          time.sleep(3)
index 20f80f3..a9755bd 100644 (file)
@@ -17,7 +17,7 @@ class Run(dict):
       
       result = ""
       for key,val in self.iteritems():
-         result += "%s : %s\n" % (key,str(val))
+         result += "%s:\t\t %s\n" % (key,str(val))
 
       return result
 
index 8557fad..aa1ebfd 100644 (file)
@@ -4,6 +4,7 @@
 import sys
 import random
 import os
+import re
 import pdb
 import pydb
 import io_pickle
@@ -72,8 +73,8 @@ extended_alphabet = ['-','a','c','g','t','n','[',']']
 alphabet          = ['-','a','c','g','t','n']
 
 # some dummy elements to be returned if current example violates assertions
-nil_dna_frag = ('','','','')
-remapped_nil = ('','','')
+nil_dna_frag = ('','','','','')
+remapped_nil = ('')
 process_nil  = ('','','','','','')
 
 nil_splice_scores = ('','')
@@ -82,7 +83,6 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
 
    assert os.path.exists(filtered_reads)
    assert os.path.exists(remapped_reads)
-
    assert not os.path.exists(dataset_file), 'The data_file already exists!'
 
    joinp = os.path.join
@@ -124,8 +124,6 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
    RemappedReads = []
 
    AlternativeSequences = []
-   AlternativeAcceptors = []
-   AlternativeDonors    = []
 
    # Iterate over all remapped reads in order to generate for each read an
    # training / prediction example
@@ -155,7 +153,7 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
 
          original_est = currentFRead['seq']
 
-         alternative_seq, alternative_acc, alternative_don = process_remapped_reads(reReads,currentFRead,dna_flat_files)
+         alternative_seq = process_remapped_reads(reReads,currentFRead,dna_flat_files)
 
          if seq == '':
             continue
@@ -172,8 +170,6 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
          #FReads.append(currentFRead)
 
          AlternativeSequences.append(alternative_seq)
-         AlternativeAcceptors.append(alternative_acc)
-         AlternativeDonors.append(alternative_don)
 
          instance_counter += 1
 
@@ -187,9 +183,7 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
 
    dataset = [Sequences, Acceptors, Donors,\
    Exons, Ests, OriginalEsts, Qualities,\
-   AlternativeSequences,\
-   AlternativeAcceptors,\
-   AlternativeDonors]
+   AlternativeSequences]
 
    #dataset = {'Sequences':Sequences, 'Acceptors':Acceptors, 'Donors':Donors,\
    #'Exons':Exons, 'Ests':Ests, 'OriginalEsts':OriginalEsts, 'Qualities':Qualities,\
@@ -209,9 +203,6 @@ def process_filtered_read(fRead,currentGene,dna_flat_files,test):
 
    """
 
-   currentReadSeq = fRead['seq']
-   currentReadSeq = currentReadSeq.lower()
-
    quality        = fRead['prb']
    #quality        = fRead['cal_prb']
    #quality        = fRead['chastity']
@@ -242,7 +233,7 @@ def process_filtered_read(fRead,currentGene,dna_flat_files,test):
          return process_nil
 
    # fetch the dna fragment that represents the part of the two exons
-   dnaFragment,currentExons,up_cut,down_cut = getDNAFragment(currentGene,genomicSeq,fRead)
+   dnaFragment,currentExons,up_cut,down_cut, currentReadSeq = getDNAFragment(currentGene,genomicSeq,fRead)
 
    if dnaFragment == '':
       return process_nil
@@ -454,7 +445,7 @@ def getDNAFragment(currentGene,currentDNASeq,fRead):
    #print 'leaving getDNAFragment...'
 
    # end of exons/read borders calculation
-   return currentDNASeq,currentExons,up_cut,down_cut
+   return currentDNASeq, currentExons, up_cut, down_cut, currentReadSeq
 
 
 def getSpliceScores(chr,strand,intervalBegin,intervalEnd,currentDNASeq,seq_pos_offset):
@@ -566,11 +557,12 @@ def process_remapped_reads(reReads,fRead,dna_flat_files):
    strand = fRead['strand']
    pos = fRead['p_start']
    chr = fRead['chr']
-   chrom         = 'chr%d' % chr
 
-   allDNASeq   = []
-   allAcc      = []
-   allDon      = []
+   #allDNASeq   = []
+   #allAcc      = []
+   #allDon      = []
+   #allLabels   = []
+   alternativeSeq = []
 
    for currentRRead in reReads:
       rId            = currentRRead['id']
@@ -590,36 +582,57 @@ def process_remapped_reads(reReads,fRead,dna_flat_files):
    
       # vmatch found a correct position
       if pos + s1_pos == pos1:
-         genomicSeq_start  = pos
-         genomicSeq_stop   = fRead['p_stop']
+         genomicSeq_start  = pos1
+         genomicSeq_stop   = pos1 + fragment_size
+         currentLabel = True
       else:
          genomicSeq_start  = pos1
          genomicSeq_stop   = pos1 + fragment_size
+         currentLabel = False
          
       #return remapped_nil
 
-      genomicSeq = load_genomic(chrom,strand,genomicSeq_start-1,genomicSeq_stop,dna_flat_files,one_based=False)
-      genomicSeq = genomicSeq.lower()
+      #currentDNASeq, currentAcc, currentDon =\
+      #get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_files)
 
-      # check the obtained dna sequence
-      assert genomicSeq != '', 'load_genomic returned empty sequence!'
-      for elem in genomicSeq:
-         assert elem in alphabet, pdb.set_trace()
+      #allDNASeq.append(currentDNASeq)
+      #allAcc.append(currentAcc)
+      #allDon.append(currentDon)
+      #allLabels.append(currentLabel)
+      alternativeSeq.append( (chr,strand,genomicSeq_start,genomicSeq_stop) )
 
-      intervalBegin  = genomicSeq_start-100
-      intervalEnd    = genomicSeq_stop+100
-      currentDNASeq  = genomicSeq
-      seq_pos_offset = genomicSeq_start
+      #currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_files)
 
-      #pydb.debugger()
+   #return allDNASeq, allAcc, allDon, allLabels
+   return alternativeSeq
+
+
+def get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_files):
+   """
+   This function expects an interval, chromosome and strand information and
+   returns then the genomic sequence of this interval and the associated scores.
+   """
+
+   chrom         = 'chr%d' % chr
+   genomicSeq = load_genomic(chrom,strand,genomicSeq_start-1,genomicSeq_stop,dna_flat_files,one_based=False)
+   genomicSeq = genomicSeq.lower()
+
+   # check the obtained dna sequence
+   assert genomicSeq != '', 'load_genomic returned empty sequence!'
+   #for elem in genomicSeq:
+   #   if not elem in alphabet:
+   
+   no_base = re.compile('[^acgt]')
+   genomicSeq = no_base.sub('n',genomicSeq)
 
-      currentAcc, currentDon = getSpliceScores(chr,strand,intervalBegin,intervalEnd,currentDNASeq,seq_pos_offset)
+   intervalBegin  = genomicSeq_start-100
+   intervalEnd    = genomicSeq_stop+100
+   currentDNASeq  = genomicSeq
+   seq_pos_offset = genomicSeq_start
 
-      allDNASeq.append(currentDNASeq)
-      allAcc.append(currentAcc)
-      allDon.append(currentDon)
+   currentAcc, currentDon = getSpliceScores(chr,strand,intervalBegin,intervalEnd,currentDNASeq,seq_pos_offset)
 
-   return allDNASeq, allAcc, allDon
+   return currentDNASeq, currentAcc, currentDon
 
 def reverse_complement(seq):
    map = {'a':'t','c':'g','g':'c','t':'a'}
index 99da8a0..802cb64 100644 (file)
@@ -8,7 +8,7 @@
 # quality scores.
 # 
 # This file represents the conversion of the main matlab 
-# training loop for Palma to python.
+# training loop for Palma to Python.
 # 
 # Author: Fabio De Bona
 # 
@@ -29,21 +29,18 @@ import QPalmaDP
 import qpalma
 from qpalma.SIQP_CPX import SIQPSolver
 from qpalma.DataProc import *
-from qpalma.generateEvaluationData import *
 from qpalma.computeSpliceWeights import *
 from qpalma.set_param_palma import *
 from qpalma.computeSpliceAlignWithQuality import *
 from qpalma.penalty_lookup_new import *
 from qpalma.compute_donacc import *
-from qpalma.export_param import *
+#from qpalma.export_param import *
 from qpalma.TrainingParam import Param
 from qpalma.Plif import Plf
 
 from qpalma.tools.splicesites import getDonAccScores
 from qpalma.Configuration import *
 
-from compile_dataset import compile_d
-
 import palma.palma_utils as pu
 from palma.output_formating import print_results
 
@@ -57,13 +54,8 @@ class QPalma:
    
    def __init__(self,run):
       self.ARGS = Param()
-
       self.run = run
 
-      #data_filename = self.run['dataset_filename']
-      #Sequences, Acceptors, Donors, Exons, Ests, OriginalEsts, Qualities, SplitPos = paths_load_data(data_filename,'training',None,self.ARGS)
-
-      # Load the whole dataset 
       if self.run['mode'] == 'normal':
          self.use_quality_scores = False
 
@@ -72,17 +64,15 @@ class QPalma:
       else:
          assert(False)
 
-      #self.Sequences   = Sequences
-      #self.Exons       = Exons
-      #self.Ests        = Ests
-      #self.OriginalEsts= OriginalEsts
-      #self.Qualities   = Qualities
-      #self.SplitPos    = SplitPos
-      #self.Donors      = Donors
-      #self.Acceptors   = Acceptors
+      full_working_path = os.path.join(run['experiment_path'],run['name'])
+      #assert not os.path.exists(full_working_path)
+      #os.mkdir(full_working_path)
+      assert os.path.exists(full_working_path)
 
-      #calc_info(self.Acceptors,self.Donors,self.Exons,self.Qualities)
-      #print 'leaving constructor...'
+      # ATTENTION: Changing working directory
+      os.chdir(full_working_path)
+
+      cPickle.dump(run,open('run_object.pickle','w+'))
 
    def plog(self,string):
       self.logfh.write(string)
@@ -91,21 +81,15 @@ class QPalma:
    def train(self):
       run = self.run
 
-      full_working_path = os.path.join(run['experiment_path'],run['name'])
-      assert not os.path.exists(full_working_path)
-      os.mkdir(full_working_path)
-      assert os.path.exists(full_working_path)
-
-      # ATTENTION: Changing working directory
-      os.chdir(full_working_path)
-
       self.logfh = open('_qpalma_train.log','w+')
 
       self.plog("Settings are:\n")
       self.plog("%s\n"%str(run))
 
       data_filename = self.run['dataset_filename']
-      Sequences, Acceptors, Donors, Exons, Ests, OriginalEsts, Qualities, SplitPos = paths_load_data(data_filename,'training',None,self.ARGS)
+      Sequences, Acceptors, Donors, Exons, Ests, OriginalEsts, Qualities,\
+      AlternativeSequences =\
+      paths_load_data(data_filename,'training',None,self.ARGS)
 
       # Load the whole dataset 
       if self.run['mode'] == 'normal':
@@ -121,7 +105,6 @@ class QPalma:
       self.Ests        = Ests
       self.OriginalEsts= OriginalEsts
       self.Qualities   = Qualities
-      self.SplitPos    = SplitPos
       self.Donors      = Donors
       self.Acceptors   = Acceptors
 
@@ -173,6 +156,7 @@ class QPalma:
       # Initialize solver 
       self.plog('Initializing problem...\n')
       solver = SIQPSolver(run['numFeatures'],numExamples,run['C'],self.logfh,run)
+
       #solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
       #solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
 
@@ -558,29 +542,34 @@ class QPalma:
 ###############################################################################
 
 
-   def evaluate(self,param_filename,dataset):
+   def evaluate(self,param_filename):
       run = self.run
+      beg = run['prediction_begin']
+      end = run['prediction_end']
 
-      data = dataset
-      self.Sequences   = data['Sequences']
-      self.Acceptors   = data['Acceptors']
-      self.Donors      = data['Donors']
-      self.Exons       = data['Exons']
-      self.Ests        = data['Ests']
-      self.OriginalEsts= data['OriginalEsts']
-      self.Qualities   = data['Qualities']
-      self.SplitPositions = data['SplitPositions']
+      data_filename = self.run['dataset_filename']
+      Sequences, Acceptors, Donors, Exons, Ests, OriginalEsts, Qualities,\
+      AlternativeSequences=\
+      paths_load_data(data_filename,'training',None,self.ARGS)
+
+      self.Sequences   = Sequences
+      self.Exons       = Exons
+      self.Ests        = Ests
+      self.OriginalEsts= OriginalEsts
+      self.Qualities   = Qualities
+      self.Donors      = Donors
+      self.Acceptors   = Acceptors
 
-      self.FReads = data['FReads']
+      self.AlternativeSequences = AlternativeSequences
 
-      beg = run['prediction_begin']
-      end = run['prediction_end']
+      #calc_info(self.Acceptors,self.Donors,self.Exons,self.Qualities)
+      #print 'leaving constructor...'
 
       self.logfh = open('_qpalma_predict.log','w+')
 
       # predict on training set
       print '##### Prediction on the training set #####'
-      #self.predict(param_filename,0,beg)
+      self.predict(param_filename,0,beg)
       
       # predict on test set
       print '##### Prediction on the test set #####'
@@ -590,6 +579,11 @@ class QPalma:
       #pdb.set_trace()
 
    def predict(self,param_filename,beg,end):
+      """
+      Performing a prediction takes...
+
+      """
+
       run = self.run
 
       if self.run['mode'] == 'normal':
@@ -607,9 +601,10 @@ class QPalma:
       Qualities   = self.Qualities[beg:end]
       Acceptors   = self.Acceptors[beg:end]
       Donors      = self.Donors[beg:end]
-      SplitPos    = self.SplitPositions[beg:end]
+      #SplitPos    = self.SplitPositions[beg:end]
+      #FReads   = self.FReads[beg:end]
 
-      FReads   = self.FReads[beg:end]
+      AlternativeSequences = self.AlternativeSequences[beg:end]
 
       print 'STARTING PREDICTION'
 
@@ -630,7 +625,6 @@ class QPalma:
       param = cPickle.load(open(param_filename))
 
       # Set the parameters such as limits penalties for the Plifs
-      #pdb.set_trace()
       [h,d,a,mmatrix,qualityPlifs] =\
       set_param_palma(param,self.ARGS.train_with_intronlengthinformation,run)
 
@@ -649,6 +643,7 @@ class QPalma:
       currentPhi = zeros((self.run['numFeatures'],1))
       totalQualityPenalties = zeros((totalQualSP,1))
 
+      # some datastructures storing the predictions
       exon1Begin = []
       exon1End = []
       exon2Begin = []
@@ -660,13 +655,12 @@ class QPalma:
 
       allPredictions = []
 
+      # beginning of the prediction loop
+      #
       for exampleIdx in range(numExamples):
          dna = Sequences[exampleIdx] 
          est = Ests[exampleIdx] 
 
-         fRead = FReads[exampleIdx]
-         est = fRead['seq']
-
          new_est = ''
          e = 0
          while True:
@@ -680,23 +674,19 @@ class QPalma:
                new_est += est[e]
                e += 1
 
-         #pdb.set_trace()
          est = new_est
-         #pdb.set_trace()
 
          print exampleIdx
 
          est = "".join(est)
          est = est.lower()
          est = est.replace('-','')
-         #original_est = OriginalEsts[exampleIdx] 
-         #original_est = "".join(original_est)
-         #original_est = original_est.lower()
 
-         original_est = est
-
-         currentSplitPos = SplitPos[exampleIdx]
+         original_est = OriginalEsts[exampleIdx] 
+         original_est = "".join(original_est)
+         original_est = original_est.lower()
 
+         #currentSplitPos = SplitPos[exampleIdx]
 
          #import re
          #num_hits = len(re.findall('\[',original_est[:currentSplitPos]))
@@ -720,7 +710,7 @@ class QPalma:
          if not run['enable_quality_scores']:
             quality = [40]*len(est)
 
-         quality = fRead['prb']
+         #quality = fRead['prb']
 
          exons = zeros((2,2))
          
@@ -747,7 +737,7 @@ class QPalma:
          currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
          currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
 
-         ################## Calculate wrong alignment(s) ######################
+         ################## Calculate alignment(s) ######################
 
          # Compute donor, acceptor with penalty_lookup_new
          # returns two double lists
@@ -849,7 +839,7 @@ class QPalma:
          self.plog(line2+'\n')
          self.plog(line3+'\n')
 
-         e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons = self.evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos,exampleIdx)
+         e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons = self.evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,exampleIdx)
 
          one_prediction = {'e1_b_off':e1_b_off,'e1_e_off':e1_e_off,
          'e2_b_off':e2_b_off ,'e2_e_off':e2_e_off,\
@@ -936,55 +926,8 @@ class QPalma:
       print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
       print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
    
-      #print 'total correct'
-      #print correct_rest2error
-
-      #import pylab
-      #
-      #y_values = []
-      #for overlap in range(36):
-      #   result = 0
-      #   try:
-      #      cv = off_rest2error[overlap]
-      #   except:
-      #      cv = 0
-
-      #   result += cv
-
-      #   try:
-      #      cv = wrong_rest2error[overlap]
-      #   except:
-      #      cv = 0
-
-      #   result += cv
-
-      #   y_values.append(result/(1.0*numExamples))
-
-      #pylab.bar(range(36),y_values,width=0.2,align='center')
-      #pylab.xticks( numpy.arange(0,36,1) )
-      #pylab.savefig('%s_error_vs_overlap.eps'%run['name'])
-      #pylab.clf()
-      #raw_input()
       cPickle.dump(allPredictions,open('%s_allPredictions'%run['name'],'w+'))
 
-      #print 'some positions off'
-      #print off_rest2error
-      #import pylab
-      #pylab.bar(off_rest2error.keys(),off_rest2error.values(),width=0.2,align='center')
-      ##pylab.show()
-      #pylab.xticks( numpy.arange(0,max(off_rest2error.keys())+1,1) )
-      #pylab.savefig('%s_offset.eps'%run['name'])
-      #pylab.clf()
-      ##raw_input()
-
-      #print 'total wrong'
-      #print wrong_rest2error
-      #import pylab
-      #pylab.bar(wrong_rest2error.keys(),wrong_rest2error.values(),width=0.2,align='center')
-      #pylab.xticks( numpy.arange(0,max(wrong_rest2error.keys())+1,1) )
-      #pylab.savefig('%s_wrong.eps'%run['name'])
-      #pylab.clf()
-
       print 'Prediction completed'
 
    def evaluatePositions(self,eBegin,eEnd):
@@ -1014,7 +957,7 @@ class QPalma:
 
       return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg
 
-   def evaluateExample(self,dna,est,exons,SpliceAlign,newEstAlign,spos,exampleIdx):
+   def evaluateExample(self,dna,est,exons,SpliceAlign,newEstAlign,exampleIdx):
       newExons = []
       oldElem = -1
       SpliceAlign = SpliceAlign.flatten().tolist()[0]
@@ -1051,28 +994,6 @@ class QPalma:
 
       return e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons
 
-def fetch_genome_info(ginfo_filename):
-   if not os.path.exists(ginfo_filename):
-      cmd = ['']*4
-      cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
-      cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
-      cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
-      cmd[3] = 'save genome_info.mat genome_info'  
-      full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
-
-      obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-      out,err = obj.communicate()
-      assert err == '', 'An error occured!\n%s'%err
-
-      ginfo = scipy.io.loadmat('genome_info.mat')
-      cPickle.dump(self.genome_info,open(ginfo_filename,'w+'))
-      return ginfo['genome_info']
-
-   else:
-      return cPickle.load(open(ginfo_filename))
-
-
-
 def calcStat(Acceptor,Donor,Exons):
    maxAcc = -100
    minAcc = 100
@@ -1108,8 +1029,6 @@ def calcStat(Acceptor,Donor,Exons):
    don_pos = [elem for elem in don_vec_pos if elem != -inf]
    don_neg = [elem for elem in don_vec_neg if elem != -inf]
 
-   #pdb.set_trace()
-
    for idx in range(len(Sequences)):
       acc = [elem for elem in Acceptors[idx] if elem != -inf]
       maxAcc = max(max(acc),maxAcc)
@@ -1206,15 +1125,19 @@ def calc_info(acc,don,exons,qualities):
 ###########################
 
 if __name__ == '__main__':
-   qpalma = QPalma()
-
    mode = sys.argv[1]
+   run_obj_fn = sys.argv[2]
+
+   run_obj = cPickle.load(open(run_obj_fn))
+
+   qpalma = QPalma(run_obj)
+
 
-   if len(sys.argv) == 2 and mode == 'train':
+   if len(sys.argv) == 3 and mode == 'train':
       qpalma.train()
 
-   elif len(sys.argv) == 3 and mode == 'predict':
-      param_filename = sys.argv[2]
+   elif len(sys.argv) == 4 and mode == 'predict':
+      param_filename = sys.argv[3]
       assert os.path.exists(param_filename)
       qpalma.evaluate(param_filename)
    else: