self.logfh.write(string)
self.logfh.flush()
- def train(self):
+ def train(self,value):
self.logfh = open('_qpalma_train.log','w+')
beg = Conf.training_begin
i3 = range(lengthSP+donSP,lengthSP+donSP+accSP)
i4 = range(lengthSP+donSP+accSP,lengthSP+donSP+accSP+mmatrixSP)
i5 = range(lengthSP+donSP+accSP+mmatrixSP,lengthSP+donSP+accSP+mmatrixSP+totalQualSP)
- intervals = [] #i5,\ #i2,#i3,#i4,#i5]
+
+ offset =lengthSP+donSP+accSP+mmatrixSP
+ #intervals = [[offset+2,offset+3]] #i5,\ #i2,#i3,#i4,#i5]
+ intervals = []
+
+ #param[offset+2] = 10.0
+ #param[offset+3] = 10.0
+
zero_out(param,intervals)
- pdb.set_trace()
+
+ #pdb.set_trace()
# Set the parameters such as limits penalties for the Plifs
[h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
dna = Sequences[exampleIdx]
est = Ests[exampleIdx]
est = "".join(est)
+ est = est.lower()
if Conf.mode == 'normal':
quality = [40]*len(est)
if Conf.mode == 'using_quality_scores':
quality = Qualities[exampleIdx]
+ #quality = [(int(math.fabs(e))) for e in quality]
+ #quality = [40]*len(est)
+
exons = Exons[exampleIdx]
don_supp = Donors[exampleIdx]
acc_supp = Acceptors[exampleIdx]
+ #for idx,elem in enumerate(don_supp):
+ # if elem != -inf:
+ # don_supp[idx] = 0.0
+
+ #for idx,elem in enumerate(acc_supp):
+ # if elem != -inf:
+ # acc_supp[idx] = 0.0
+
# Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
if Conf.mode == 'using_quality_scores':
trueSpliceAlign, trueWeightMatch, trueWeightQuality =\
zero_out(currentPhi,intervals)
zero_out(trueWeight,intervals)
- #pdb.set_trace()
-
# Calculate w'phi(x,y) the total score of the alignment
trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
# check that splice site scores are at dna positions as expected by
# the dynamic programming component
+
for d_pos in [pos for pos,elem in enumerate(donor) if elem != -inf]:
assert dna[d_pos] == 'g' and (dna[d_pos+1] == 'c'\
or dna[d_pos+1] == 't'), pdb.set_trace()
zero_out(features,intervals)
zero_out(allWeights[:,pathNr+1],intervals)
- #pdb.set_trace()
-
AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
+ #pdb.set_trace()
+
distinct_scores = False
if math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) > 1e-5:
distinct_scores = True
if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
pdb.set_trace()
- if exampleIdx == 1:
- self.plog(" scalar prod (correct) : %f\n"%AlignmentScores[0])
- self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr,0],AlignmentScores[pathNr+1]))
+ self.plog(" scalar prod (correct) : %f\n"%AlignmentScores[0])
+ self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr,0],AlignmentScores[pathNr+1]))
- pdb.set_trace()
+ #pdb.set_trace()
# if the pathNr-best alignment is very close to the true alignment consider it as true
if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
true_map[pathNr+1] = 1
- #pdb.set_trace()
#assert AlignmentScores[0] <= max(AlignmentScores[1:]) + 1e-6, pdb.set_trace()
if not trueAlignmentScore <= max(AlignmentScores[1:]) + 1e-6:
+ print "suboptimal_example %d\n" %exampleIdx
+ trueSpliceAlign, trueWeightMatch, trueWeightQuality =\
+ computeSpliceAlignWithQuality(dna, exons, est, quality, qualityPlifs)
+
+ pdb.set_trace()
suboptimal_example += 1
self.plog("suboptimal_example %d\n" %exampleIdx)
-
# the true label sequence should not have a larger score than the maximal one WHYYYYY?
# this means that all n-best paths are to close to each other
# we have to extend the n-best search to a (n+1)-best
firstFalseIdx = map_idx
break
- #if exampleIdx == 1:
if False:
-
self.plog("Is considered as: %d\n" % true_map[1])
result_len = currentAlignment.getResultLength()
# if there is at least one useful false alignment add the
# corresponding constraints to the optimization problem
if firstFalseIdx != -1:
- trueWeights = trueWeight
firstFalseWeights = allWeights[:,firstFalseIdx]
- differenceVector = trueWeights - firstFalseWeights
+ differenceVector = trueWeight - firstFalseWeights
#pdb.set_trace()
if Conf.USE_OPT:
sum_xis += elem
print 'sum of slacks is %f'% sum_xis
+ self.plog('sum of slacks is %f\n'% sum_xis)
for i in range(len(param)):
param[i] = w[i]
if Conf.mode == 'using_quality_scores':
quality = Qualities[exampleIdx]
+ #quality = [40]*len(est)
+
exons = Exons[exampleIdx]
# NoiseMatrix = Noises[exampleIdx]
don_supp = Donors[exampleIdx]
acc_supp = Acceptors[exampleIdx]
- # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
- if Conf.mode == 'using_quality_scores':
- trueSpliceAlign, trueWeightMatch, trueWeightQuality =\
- computeSpliceAlignWithQuality(dna, exons, est, quality, qualityPlifs)
- else:
- trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
-
- # Calculate the weights
- trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
- trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
+ # for idx,elem in enumerate(don_supp):
+ # if elem != -inf:
+ # don_supp[idx] = 0.0
+
+ # for idx,elem in enumerate(acc_supp):
+ # if elem != -inf:
+ # acc_supp[idx] = 0.0
+
+ ## Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
+ #if Conf.mode == 'using_quality_scores':
+ # trueSpliceAlign, trueWeightMatch, trueWeightQuality =\
+ # computeSpliceAlignWithQuality(dna, exons, est, quality, qualityPlifs)
+ #else:
+ # trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
+ #
+ ## Calculate the weights
+ #trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
+ #trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
+
+ #if Conf.mode == 'using_quality_scores':
+ # totalQualityPenalties = param[-totalQualSP:]
+ # currentPhi[donSP+accSP+lengthSP+mmatrixSP:] = totalQualityPenalties[:]
currentPhi[0:donSP] = mat(d.penalties[:]).reshape(donSP,1)
currentPhi[donSP:donSP+accSP] = mat(a.penalties[:]).reshape(accSP,1)
currentPhi[donSP+accSP:donSP+accSP+lengthSP] = mat(h.penalties[:]).reshape(lengthSP,1)
currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP] = mmatrix[:]
- if Conf.mode == 'using_quality_scores':
- totalQualityPenalties = param[-totalQualSP:]
- currentPhi[donSP+accSP+lengthSP+mmatrixSP:] = totalQualityPenalties[:]
-
# Calculate w'phi(x,y) the total score of the alignment
- trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
+ #trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
# The allWeights vector is supposed to store the weight parameter
# of the true alignment as well as the weight parameters of the
# 1 other alignments
- allWeights = zeros((Conf.numFeatures,1+1))
- allWeights[:,0] = trueWeight[:,0]
+ #allWeights = zeros((Conf.numFeatures,1+1))
+ #allWeights[:,0] = trueWeight[:,0]
- AlignmentScores = [0.0]*(1+1)
- AlignmentScores[0] = trueAlignmentScore
+ #AlignmentScores = [0.0]*(1+1)
+ #AlignmentScores[0] = trueAlignmentScore
################## Calculate wrong alignment(s) ######################
dna_len = len(dna)
est_len = len(est)
+
ps = h.convert2SWIG()
prb = QPalmaDP.createDoubleArrayFromList(quality)
self.plog(line2+'\n')
self.plog(line3+'\n')
- weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign.flatten().tolist()[0], don_supp, acc_supp)
+ #weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign.flatten().tolist()[0], don_supp, acc_supp)
- decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
- for qidx in range(Conf.totalQualSuppPoints):
- decodedQualityFeatures[qidx] = newQualityPlifsFeatures[qidx]
+ #decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
+ #for qidx in range(Conf.totalQualSuppPoints):
+ # decodedQualityFeatures[qidx] = newQualityPlifsFeatures[qidx]
# Gewichte in restliche Zeilen der Matrix speichern
- wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch.T, decodedQualityFeatures])
- allWeights[:,pathNr+1] = wp
+ #wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch.T, decodedQualityFeatures])
+ #allWeights[:,pathNr+1] = wp
- hpen = mat(h.penalties).reshape(len(h.penalties),1)
- dpen = mat(d.penalties).reshape(len(d.penalties),1)
- apen = mat(a.penalties).reshape(len(a.penalties),1)
- features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
- AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
-
- # if the pathNr-best alignment is very close to the true alignment consider it as true
- if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
- true_map[pathNr+1] = 1
+ #hpen = mat(h.penalties).reshape(len(h.penalties),1)
+ #dpen = mat(d.penalties).reshape(len(d.penalties),1)
+ #apen = mat(a.penalties).reshape(len(a.penalties),1)
+ #features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
+ #AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons = self.evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos,exampleIdx)
if __name__ == '__main__':
qpalma = QPalma()
- if len(sys.argv) == 2:
- mode = sys.argv[1]
- assert mode == 'train'
- qpalma.train()
+ mode = sys.argv[1]
+
+ if len(sys.argv) == 3 and mode == 'train':
+ qpalma.train(int(sys.argv[2]))
- elif len(sys.argv) == 3:
- mode = sys.argv[1]
+ elif len(sys.argv) == 3 and mode == 'predict':
param_filename = sys.argv[2]
- assert mode == 'predict'
assert os.path.exists(param_filename)
qpalma.evaluate(param_filename)
else: