};
#endif // _QPALMA_DP_H_
-
[ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
[ 0.4177651 ], [ 0.01360547], [ 0.29069319]])
+#fixedParamQ = numpy.matlib.mat(range(1300))
+#fixedParamQ = fixedParamQ.reshape((1300,1))
###########################################################
#
#
#
-C = 10000
+C = 1
+###############################################################################
+#
+# CHOOSING THE MODE
+#
# 'normal' means work like Palma 'using_quality_scores' means work like Palma
# plus using sequencing quality scores
+#
+###############################################################################
#mode = 'normal'
-
mode = 'using_quality_scores'
+
+
+###############################################################################
+#
# When using quality scores our scoring function is defined as
#
# f: S_e x R x S -> R
#
# At ests we do not have gaps with quality scores so we look up the matchmatrix
# instead.
+###############################################################################
numDonSuppPoints = 30
numAccSuppPoints = 30
numFeatures = numDonSuppPoints + numAccSuppPoints\
+ numLengthSuppPoints + sizeMatchmatrix[0]*sizeMatchmatrix[1] + totalQualSuppPoints
+
+###############################################################################
+#
+# GENERAL SETTINGS CONCERNING THE SOLVER
+#
+#
+#
+###############################################################################
+
iter_steps = 40
remove_duplicate_scores = False
print_matrix = False
else:
assert False, 'Wrong operation mode specified'
+###############################################################################
+#
+# DATA SETTINGS CONCERNING THE SPLITS AND FILE LOCATIONS
#
#
+#
+###############################################################################
+
training_begin = 0
-training_end = 1000
-prediction_begin = 0
-prediction_end = 1000
+training_end = 1500
+prediction_begin = 1500
+prediction_end = 2200
-#
-#
-#
dna_filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/allGenes.pickle'
est_filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/filteredReads_recent'
tair7_seq_filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/tair7_seq.pickle'
-# Check for valid parameters
+###############################################################################
+#
+# SANITY CHECKS
+#
+###############################################################################
assert numQualPlifs >= 0
assert numDonSuppPoints > 1
assert numAccSuppPoints > 1
assert os.path.exists(dna_filename), 'DNA data does not exist!'
assert os.path.exists(est_filename), 'EST/Reads data does not exist!'
assert os.path.exists(tair7_seq_filename), 'Sequence data does not exist!'
+
from PyGff import *
import io_pickle
import scipy.io
-import pdb
+import sys
+
+from genome_utils import load_genomic
def paths_load_data_solexa(expt,genome_info,PAR,test=False):
# expt can be 'training','validation' or 'test'
dna_filename = Conf.dna_filename
est_filename = Conf.est_filename
- tair7_seq_filename = Conf.tair7_seq_filename
- tair7_seq = cPickle.load(open(tair7_seq_filename))
+ #tair7_seq_filename = Conf.tair7_seq_filename
+ #tair7_seq = cPickle.load(open(tair7_seq_filename))
+
allGenes = cPickle.load(open(dna_filename))
Sequences = []
Qualities = []
SplitPositions = []
+ # Iterate over all reads
for line in open(est_filename):
line = line.strip()
chr,strand,seq,splitpos,length,prb,cal,chastity,gene_id,p_start,exon_stop,exon_start,p_stop = line.split()
currentGene = allGenes[gene_id]
seq = seq.lower()
- try:
- currentSeq = tair7_seq[gene_id+'.1']['sequence'].lower()
- except:
- continue
+ #try:
+ # currentSeq = tair7_seq[gene_id+'.1']['sequence'].lower()
+ #except:
+ # continue
+
+ genome_file = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+ chrom = 'chr1'
+
+ # use matlab-style functions to access dna sequence
+ dna = load_genomic(chrom,'+',currentGene.start,currentGene.stop,genome_file,one_based=True)
+ currentSeq = dna
if len(currentSeq) < (currentGene.stop - currentGene.start):
continue
-
- oldSeq = currentSeq
+ # compare both sequences
+ #assert dna == currentSeq, pdb.set_trace()
+
p_start = int(p_start)
exon_stop = int(exon_stop)
exon_start = int(exon_start)
p_stop = int(p_stop)
- assert p_start <= exon_stop <= exon_start <= p_stop, 'Invalid Indices'
+ assert p_start < exon_stop < exon_start < p_stop, 'Invalid Indices'
+ assert exon_stop - p_start + p_stop - exon_start == 36, 'Invalid read size'
assert p_stop - p_start >= 36
-
+
currentExons = zeros((2,2))
- currentExons[0,0] = p_start-currentGene.start
- currentExons[0,1] = exon_stop-currentGene.start
+ currentExons[0,0] = p_start
+ currentExons[0,1] = exon_stop
- currentExons[1,0] = exon_start-currentGene.start
- currentExons[1,1] = p_stop-currentGene.start
+ currentExons[1,0] = exon_start
+ currentExons[1,1] = p_stop
- import copy
- oldExons = copy.deepcopy(currentExons)
-
+ # make indices relative to start pos of the current gene
+ currentExons -= currentGene.start
+
+ # determine cutting positions for sequences
up_cut = int(currentExons[0,0])
down_cut = int(currentExons[1,1])
+ # if we perform testing we cut a much wider region as we want to detect
+ # how good we perform on a region
if test:
up_cut = up_cut-500
if up_cut < 0:
if down_cut > len(currentSeq):
down_cut = len(currentSeq)
+ # cut out part of the sequence
currentSeq = currentSeq[up_cut:down_cut]
+ # ensure that index is correct
currentExons[0,1] += 1
- #currentExons[1,0] += 1
if test:
currentExons -= up_cut
currentExons -= currentExons[0,0]
try:
- if not (currentSeq[int(currentExons[0,1])] == 'g' and currentSeq[int(currentExons[0,1])+1] == 't'):
+ if not (currentSeq[int(currentExons[0,1])] == 'g' and\
+ currentSeq[int(currentExons[0,1])+1] in ['c','t' ]):
continue
if not (currentSeq[int(currentExons[1,0])-1] == 'g' and currentSeq[int(currentExons[1,0])-2] == 'a'):
Sequences.append(currentSeq)
- currentSize = len(Sequences[-1])
- Acceptors.append([0]*currentSize)
- Donors.append([0]*currentSize)
+ # now we want to use interval_query to get the predicted splice scores
+ # trained on the TAIR sequence and annotation
+
+ from Genefinding import *
+
+ interval_matrix = createIntArrayFromList([currentGene.start+up_cut,currentGene.start+down_cut])
+ num_fields = 1
+ num_intervals = 1
+
+ # fetch acceptor scores
+ fname = '/fml/ag-raetsch/share/projects/genefinding/A_thaliana/jonas_new_genes/exp/sensors/acc/output_SVMWD/pred/contig_1+'
+ gf = Genef()
+ num_hits = gf.query(fname, ['Conf'], num_fields, interval_matrix, num_intervals)
+ assert num_hits <= len(currentSeq)
+
+ #print 'Acceptor hits: %d' % num_hits
+
+ pos = createIntArrayFromList([0]*num_hits)
+ indices = createIntArrayFromList([0]*num_hits)
+ scores = createDoubleArrayFromList([0]*num_hits*num_fields)
+ gf.getResults(pos,indices,scores)
+
+ currentAcceptors = [-inf]*(len(currentSeq)+1)
+
+ for i in range(num_hits):
+ position = pos[i]
+ position -= (currentGene.start+up_cut)
+ assert 0 <= position < len(currentSeq)+1, 'position index wrong'
+ currentAcceptors[position] = scores[i]
+
+ currentAcceptors = currentAcceptors[1:] + [-inf]
+ Acceptors.append(currentAcceptors)
+
+ del gf
+
+ # fetch donor scores
+ fname = '/fml/ag-raetsch/share/projects/genefinding/A_thaliana/jonas_new_genes/exp/sensors/don/output_SVMWD/pred/contig_1+'
+ gf = Genef()
+ num_hits = gf.query(fname, ['Conf'], num_fields, interval_matrix, num_intervals)
+ assert num_hits <= len(currentSeq)
+
+ #print 'Donor hits: %d' % num_hits
+ pos = createIntArrayFromList([0]*num_hits)
+ indices = createIntArrayFromList([0]*num_hits)
+ scores = createDoubleArrayFromList([0]*num_hits*num_fields)
+ gf.getResults(pos,indices,scores)
+
+ currentDonors = [-inf]*(len(currentSeq))
+
+ try:
+ for i in range(1,num_hits):
+ position = pos[i]
+ position -= (currentGene.start+up_cut)
+ currentDonors[position-1] = scores[i]
+ except:
+ pdb.set_trace()
+
+ currentDonors = [-inf] + currentDonors[:-1]
+ Donors.append(currentDonors)
Exons.append(currentExons)
Ests.append(seq)
# Qualities = Qualities[:-1]
# SplitPositions = SplitPositions[:-1]
- print 'found %d examples' % len(Sequences)
+ #print 'found %d examples' % len(Sequences)
return Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPositions
train_data = '%s/exons_train_local.pickle' % genome_info.basedir
data = cPickle.load(open(train_data))
- print 'train_data is %s' % train_data
+ #print 'train_data is %s' % train_data
Sequences = data['Train'] # dna sequences
Acceptors = data['TrainAcc'] # acceptor scores
#microexon_train_data = io_pickle.load(train_data_pickle)
data = scipy.io.loadmat(train_data)
- print 'train_data is %s' % train_data
+ #print 'train_data is %s' % train_data
Sequences = data['Train'] # dna sequences
Acceptors = data['TrainAcc'] # acceptor scores
totalQualSP = Configuration.totalQualSuppPoints
numQPlifs = Configuration.numQualPlifs
+ regC = self.numFeatures / 1.0*self.numExamples
+
for j in range(lengthSP-1):
self.P[j,j+1] = -1.0
self.P[j+1,j] = -1.0
self.P[j+1,j] = -1.0
self.P[j,j] += 1.0
+ # 0.25 for each was already good
+
+ lengthGroupParam = 0.005
+ spliceGroupParam = 0.005
+
+ matchGroupParam = 0.495
+ qualityGroupParam = 0.495
+
+ self.P[0:lengthSP,0:lengthSP] *= lengthGroupParam
+
+ beg = lengthSP
+ end = lengthSP+donSP+accSP
+ self.P[beg:end,beg:end] *= spliceGroupParam
+
+ beg = lengthSP+donSP+accSP
+ end = lengthSP+donSP+accSP+mmatrixSP
+ self.P[beg:end,beg:end] *= matchGroupParam
+
+ beg = lengthSP+donSP+accSP+mmatrixSP
+ self.P[beg:-self.numExamples,beg:-self.numExamples] *= qualityGroupParam
+
+ self.P[0:-self.numExamples,0:-self.numExamples] *= regC*1e-3
+
+
def createRegularizer(self):
#self.createUnitRegularizer()
self.createSmoothnessRegularizer()
CPX.setintparam(self.env, CPX_PARAM_SCRIND, CPX_ON) # print >> self.protocol, info to screen
CPX.setintparam(self.env, CPX_PARAM_DATACHECK, CPX_ON)
+ #CPX.setintparam(self.env, CPX_PARAM_QPMETHOD, 2)
# create CPLEX problem, add objective and constraints to it
self.lp = CPX.createprob(self.env, 'test1')
self.matbeg, self.matcnt, self.matind, self.matval,
self.lb, self.ub)
+ import pdb
+ #pdb.set_trace()
+
+ assert sum(self.P[self.numFeatures:,self.numFeatures:]) == 0.0, 'slack variables are regularized'
+
self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval = self.cpx_matrix(self.P)
CPX.copyquad(self.env, self.lp, self.qmatbeg, self.qmatcnt, self.qmatind, self.qmatval)
print 'Setting parameters ...'
- if min_intron_len == None:
- if train_with_intronlengthinformation:
- min_intron_len=20
- max_intron_len=1000
- else:
- min_intron_len = 1
- max_intron_len = 2
-
- if min_intron_len != None and max_intron_len != None:
- min_svm_score=-5
- max_svm_score=5
+ #if min_intron_len == None:
+ # if train_with_intronlengthinformation:
+ # min_intron_len=20
+ # max_intron_len=1000
+ # else:
+ # min_intron_len = 1
+ # max_intron_len = 2
+ min_intron_len=4
+ max_intron_len=10000
+
+ #if min_intron_len != None and max_intron_len != None:
+ # min_svm_score=-5
+ # max_svm_score=5
+ min_svm_score=-3
+ max_svm_score=1
h = Plf()
d = Plf()
# Gapfunktion
####################
h.limits = logspace(math.log(min_intron_len,10),math.log(max_intron_len,10),30)
+ #h.limits = linspace(min_intron_len,max_intron_len,30)
h.penalties = param[0:lengthSP].flatten().tolist()[0]
#h.transform = '+1'
h.transform = ''
h.name = 'h'
- h.max_len = 100000
- h.min_len = 4
+ h.max_len = max_intron_len
+ h.min_len = min_intron_len
h.id = 1
h.use_svm = 0
h.next_id = 0
-
####################
# Donorfunktion
####################
- d.limits = linspace(min_svm_score,max_svm_score,30)
+ d.limits = linspace(min_svm_score,max_svm_score,30)
d.penalties = param[lengthSP:lengthSP+donSP].flatten().tolist()[0]
#d.transform = '+1'
d.transform = ''
d.name = 'd'
- d.max_len = 100
- d.min_len = -100
+ d.max_len = max_svm_score
+ d.min_len = min_svm_score
d.id = 1
d.use_svm = 0
d.next_id = 0
-
####################
# Acceptorfunktion
####################
- a.limits = linspace(min_svm_score,max_svm_score,30)
+ a.limits = linspace(min_svm_score,max_svm_score,30)
a.penalties = param[lengthSP+donSP:lengthSP+donSP+accSP].flatten().tolist()[0]
#a.transform = '+1'
a.transform = ''
a.name = 'a'
- a.max_len = 100
- a.min_len = -100
+ a.max_len = max_svm_score
+ a.min_len = min_svm_score
a.id = 1
a.use_svm = 0
a.next_id = 0
from qpalma.Plif import Plf
from qpalma.tools.splicesites import getDonAccScores
-
from qpalma.Configuration import *
class para:
#ginfo_filename = 'genome_info.pickle'
#self.genome_info = fetch_genome_info(ginfo_filename)
#self.plog('genome_info.basedir is %s\n'%self.genome_info.basedir)
-
#self.ARGS.train_with_splicesitescoreinformation = False
# Load the whole dataset
else:
assert(False)
-
def plog(self,string):
self.logfh.write(string)
self.logfh.flush()
dna_len = len(dna)
est_len = len(est)
+ # check that splice site scores are at dna positions as expected by
+ # the dynamic programming component
+ for d_pos in [pos for pos,elem in enumerate(donor) if elem != -inf]:
+ assert dna[d_pos] == 'g' and (dna[d_pos+1] == 'c'\
+ or dna[d_pos+1] == 't'), pdb.set_trace()
+
+ for a_pos in [pos for pos,elem in enumerate(acceptor) if elem != -inf]:
+ assert dna[a_pos-1] == 'a' and dna[a_pos] == 'g', pdb.set_trace()
+
ps = h.convert2SWIG()
prb = QPalmaDP.createDoubleArrayFromList(quality)
#cPickle.dump(pa,open('elegans.param','w+'))
self.logfh.close()
-
- def predict(self):
- self.logfh = open('_qpalma_predict.log','w+')
-
+ def evaluate(self,param_filename):
beg = Conf.prediction_begin
end = Conf.prediction_end
+ # predict on training set
+ print '##### Prediction on the training set #####'
+ self.predict(param_filename,0,beg)
+
+ # predict on test set
+ print '##### Prediction on the test set #####'
+ self.predict(param_filename,beg,end)
+
+ pdb.set_trace()
+
+ def predict(self,param_filename,beg,end):
+
+ self.logfh = open('_qpalma_predict.log','w+')
+
Sequences = self.Sequences[beg:end]
Exons = self.Exons[beg:end]
Ests = self.Ests[beg:end]
print_matrix = Conf.print_matrix
anzpath = Conf.anzpath
- param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_6.pickle'
+ #param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_25.pickle'
param = load_param(param_filename)
# Set the parameters such as limits penalties for the Plifs
currentPhi = zeros((Conf.numFeatures,1))
totalQualityPenalties = zeros((totalQualSP,1))
- total_up_off = []
- total_down_off = []
+ exon1Begin = []
+ exon1End = []
+ exon2Begin = []
+ exon2End = []
allWrongExons = []
for exampleIdx in range(numExamples):
newWeightMatch = zeros((mm_len,1))
newQualityPlifsFeatures = zeros((Conf.totalQualSuppPoints,1))
- #print 'newSpliceAlign'
for i in range(dna_len):
newSpliceAlign[i] = c_SpliceAlign[i]
- # print '%f' % (spliceAlign[i])
- #print 'newEstAlign'
for i in range(est_len):
newEstAlign[i] = c_EstAlign[i]
- # print '%f' % (spliceAlign[i])
- #print 'weightMatch'
for i in range(mm_len):
newWeightMatch[i] = c_WeightMatch[i]
- # print '%f' % (weightMatch[i])
newDPScores = c_DPScores[0]
for i in range(Conf.totalQualSuppPoints):
newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
- #print "Calling destructors"
del c_SpliceAlign
del c_EstAlign
del c_WeightMatch
newSpliceAlign = newSpliceAlign.reshape(1,dna_len)
newWeightMatch = newWeightMatch.reshape(1,mm_len)
- # Calculate weights of the respective alignments Note that we are
- # calculating n-best alignments without hamming loss, so we
- # have to keep track which of the n-best alignments correspond to
- # the true one in order not to incorporate a true alignment in the
- # constraints. To keep track of the true and false alignments we
- # define an array true_map with a boolean indicating the
- # equivalence to the true alignment for each decoded alignment.
true_map = [0]*2
true_map[0] = 1
pathNr = 0
if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
true_map[pathNr+1] = 1
- up_off,down_off,newExons = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
- #evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
- #print up_off,down_off
+ e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
- if up_off > -1:
- total_up_off.append(up_off)
- total_down_off.append(down_off)
+ if e1_b_off != None:
+ exon1Begin.append(e1_b_off)
+ exon1End.append(e1_e_off)
+ exon2Begin.append(e2_b_off)
+ exon2End.append(e2_e_off)
else:
allWrongExons.append((newExons,exons))
- total_up = 0
- total_down = 0
- for idx in range(len(total_up_off)):
- total_up += total_up_off[idx]
- total_down += total_down_off[idx]
-
- total_up /= len(total_up_off)
- total_down /= len(total_down_off)
+ e1Begin_pos,e1Begin_neg,e1End_pos,e1End_neg,mean_e1Begin_neg,mean_e1End_neg = self.evaluatePositions(exon1Begin,exon1End)
+ e2Begin_pos,e2Begin_neg,e2End_pos,e2End_neg,mean_e2Begin_neg,mean_e2End_neg = self.evaluatePositions(exon2Begin,exon2End)
- print 'Mean up_off is %f' % total_up
- print 'Mean down_off is %f' % total_down
- print 'Correct up_off len is %d' % len([elem for elem in total_up_off if elem in [0,1]])
- print 'Correct down_off len is %d' % len([elem for elem in total_down_off if elem in [0,1]])
+ print 'Total num. of Examples: %d' % numExamples
+ print 'Correct positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_pos),len(e1End_pos),len(e2Begin_pos),len(e2End_pos))
+ print 'Incorrect positions:\t\t%d\t%d\t%d\t%d' % (len(e1Begin_neg),len(e1End_neg),len(e2Begin_neg),len(e2End_neg))
+ print 'Mean of pos. offset:\t\t%.2f\t%.2f\t%.2f\t%.2f' % (mean_e1Begin_neg,mean_e1End_neg,mean_e2Begin_neg,mean_e2End_neg)
print 'Prediction completed'
+ self.logfh.close()
- pdb.set_trace()
+ def evaluatePositions(self,eBegin,eEnd):
+ eBegin_pos = [elem for elem in eBegin if elem == 0]
+ eBegin_neg = [elem for elem in eBegin if elem != 0]
+ eEnd_pos = [elem for elem in eEnd if elem == 0]
+ eEnd_neg = [elem for elem in eEnd if elem != 0]
+
+ mean_eBegin_neg = 0
+ for idx in range(len(eBegin_neg)):
+ mean_eBegin_neg += eBegin_neg[idx]
+
+ mean_eBegin_neg /= 1.0*len(eBegin_neg)
+
+ mean_eEnd_neg = 0
+ for idx in range(len(eEnd_neg)):
+ mean_eEnd_neg += eEnd_neg[idx]
+
+ mean_eEnd_neg /= 1.0*len(eEnd_neg)
+
+ return eBegin_pos,eBegin_neg,eEnd_pos,eEnd_neg,mean_eBegin_neg,mean_eEnd_neg
- self.logfh.close()
def fetch_genome_info(ginfo_filename):
if not os.path.exists(ginfo_filename):
oldElem = SpliceAlign[pos-1]
if oldElem != 0 and elem == 0: # start of exon
- newExons.append(pos-1)
+ newExons.append(pos)
if oldElem == 0 and elem != 0: # end of exon
newExons.append(pos)
- up_off = -1
- down_off = -1
+ e1_b_off,e1_e_off,e2_b_off,e2_e_off = 0,0,0,0
- if len(newExons) != 4:
- acc = 0.0
- else:
+ if len(newExons) == 4:
e1_begin,e1_end = newExons[0],newExons[1]
e2_begin,e2_end = newExons[2],newExons[3]
- up_off = int(math.fabs(e1_end - exons[0,1]))
- down_off = int(math.fabs(e2_begin - exons[1,0]))
+ #elif len(newExons) > 4 :
+ # e1_begin,e1_end = newExons[0],newExons[1]
+ # e2_begin,e2_end = newExons[-2],newExons[-1]
+ else:
+ return None,None,None,None,newExons
+
+ e1_b_off = int(math.fabs(e1_begin - exons[0,0]))
+ e1_e_off = int(math.fabs(e1_end - exons[0,1]))
+
+ e2_b_off = int(math.fabs(e2_begin - exons[1,0]))
+ e2_e_off = int(math.fabs(e2_end - exons[1,1]))
+
+ return e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons
+
+def calcStat(Acceptor,Donor,Exons):
+ maxAcc = -100
+ minAcc = 100
+ maxDon = -100
+ minDon = 100
+
+ acc_vec_pos = []
+ acc_vec_neg = []
+ don_vec_pos = []
+ don_vec_neg = []
+
+ for jdx in range(len(Acceptors)):
+ currentExons = Exons[jdx]
+ currentAcceptor = Acceptors[jdx]
+ currentAcceptor = currentAcceptor[1:]
+ currentAcceptor.append(-inf)
+ currentDonor = Donors[jdx]
+
+ for idx,elem in enumerate(currentAcceptor):
+ if idx == (int(currentExons[1,0])-1): # acceptor site
+ acc_vec_pos.append(elem)
+ else:
+ acc_vec_neg.append(elem)
+
+ for idx,elem in enumerate(currentDonor):
+ if idx == (int(currentExons[0,1])): # donor site
+ don_vec_pos.append(elem)
+ else:
+ don_vec_neg.append(elem)
- return up_off,down_off,newExons
+ acc_pos = [elem for elem in acc_vec_pos if elem != -inf]
+ acc_neg = [elem for elem in acc_vec_neg if elem != -inf]
+ don_pos = [elem for elem in don_vec_pos if elem != -inf]
+ don_neg = [elem for elem in don_vec_neg if elem != -inf]
+ pdb.set_trace()
+
+ for idx in range(len(Sequences)):
+ acc = [elem for elem in Acceptors[idx] if elem != -inf]
+ maxAcc = max(max(acc),maxAcc)
+ minAcc = min(min(acc),minAcc)
+
+ don = [elem for elem in Donors[idx] if elem != -inf]
+ maxDon = max(max(don),maxDon)
+ minDon = min(min(don),minDon)
if __name__ == '__main__':
qpalma = QPalma()
- if len(sys.argv) != 2:
- print 'You have to choose between training or prediction mode:'
- print 'python qpalma. py (train|predict)'
- mode = sys.argv[1]
- assert mode in ['train','predict']
-
- if mode == 'train':
+ if len(sys.argv) == 2:
+ mode = sys.argv[1]
+ assert mode == 'train'
qpalma.train()
-
- if mode == 'predict':
- qpalma.predict()
+
+ elif len(sys.argv) == 3:
+ mode = sys.argv[1]
+ param_filename = sys.argv[2]
+ assert mode == 'predict'
+ assert os.path.exists(param_filename)
+ qpalma.evaluate(param_filename)
+ else:
+ print 'You have to choose between training or prediction mode:'
+ print 'python qpalma. py (train|predict) <param_file>'