fixedParamQ = numpy.matlib.mat(
[[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
[ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+ [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
+ [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
+ [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
+ [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
+ [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
+ [ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
+ [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+ [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+ [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+ [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+ [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+ [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+ [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+ [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+ [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+ [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+ [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+ [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+ [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+ [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+ [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+ [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+ [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+ [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+ [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+ [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+ [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+ [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+ [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+ [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+ [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+ [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+ [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+ [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+ [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+ [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+ [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+ [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+ [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+ [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+ [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+ [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+ [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+ [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+ [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+ [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
+ [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
+ [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
+ [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+ [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
+ [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
+ [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
+ [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
+ [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
+ [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
+ [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
+ [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
+ [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
+ [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
+ [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
+ [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
+ [ 0.72706009], [ 0.087196 ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
+ [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
+ [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
+ [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
+ [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
[ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
[ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
[ 0.69677236], [ 0.41673747], [ 0.564245 ], [ 0.37613432], [ 0.88805642],
#
#
-C = 1.0
+C = 1000.0
# 'normal' means work like Palma 'using_quality_scores' means work like Palma
# plus using sequencing quality scores
numDonSuppPoints = 30
numAccSuppPoints = 30
numLengthSuppPoints = 30
-numQualSuppPoints = 2
+numQualSuppPoints = 16
min_qual = -1
max_qual = 40
else:
assert False, 'Wrong operation mode specified'
-
#
#
#
est_filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/filteredReads_recent'
tair7_seq_filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/tair7_seq.pickle'
-
# Check for valid parameters
assert numQualPlifs >= 0
assert numDonSuppPoints > 1
use_quality_scores = False
elif Conf.mode == 'using_quality_scores':
- Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
-
- end = 30
- Sequences = Sequences[:end]
- Acceptors = Acceptors[:end]
- Donors = Donors[:end]
- Exons = Exons[:end]
- Ests = Ests[:end]
- Qualities = Qualities[:end]
- SplitPos = SplitPos[:end]
-
- #Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+ #Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
+ #end = 50
+ #Sequences = Sequences[:end]
+ #Acceptors = Acceptors[:end]
+ #Donors = Donors[:end]
+ #Exons = Exons[:end]
+ #Ests = Ests[:end]
+ #Qualities = Qualities[:end]
+ #SplitPos = SplitPos[:end]
+
+ Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+ SplitPos = [1]*len(Qualities)
#Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
#pdb.set_trace()
#Qualities = []
# Initialize parameter vector / param = numpy.matlib.rand(126,1)
#param_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/python/elegans.param'
- param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_20.pickle'
+ param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_30.pickle'
param = load_param(param_filename)
# Set the parameters such as limits penalties for the Plifs
#pdb.set_trace()
- up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
- print up_off,down_off
-
- if up_off > -1:
- total_up_off.append(up_off)
- total_down_off.append(down_off)
-
- total_up = 0
- total_down = 0
- for idx in range(len(total_up_off)):
- total_up += total_up_off[idx]
- total_down += total_down_off[idx]
-
- total_up /= len(total_up_off)
- total_down /= len(total_down_off)
-
- print 'Mean up_off is %f' % total_up
- print 'Mean down_off is %f' % total_down
- #print total_up_off
- #print total_down_off
- print 'len is %d' % len(total_up_off)
+ #up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+ evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+ #print up_off,down_off
+
+ #if up_off > -1:
+ # total_up_off.append(up_off)
+ # total_down_off.append(down_off)
+
+ #total_up = 0
+ #total_down = 0
+ #for idx in range(len(total_up_off)):
+ # total_up += total_up_off[idx]
+ # total_down += total_down_off[idx]
+ #
+ #total_up /= len(total_up_off)
+ #total_down /= len(total_down_off)
+
+ #print 'Mean up_off is %f' % total_up
+ #print 'Mean down_off is %f' % total_down
+ ##print total_up_off
+ ##print total_down_off
+ #print 'len is %d' % len(total_up_off)
print 'Prediction completed'
self.logfh.close()
else:
oldElem = SpliceAlign[pos-1]
- if oldElem != 0 and elem == 0:
- newExons.append(pos)
-
- if oldElem == 0 and elem != 0:
+ if oldElem == 2 and elem == 0: # start of exon
newExons.append(pos-1)
+ if oldElem == 0 and elem == 1: # end of exon
+ newExons.append(pos)
+
pdb.set_trace()
- up_off = -1
- down_off = -1
+ #up_off = -1
+ #down_off = -1
- if len(newExons) != 4:
- acc = 0.0
- else:
- e1_begin,e1_end = newExons[0],newExons[1]
- e2_begin,e2_end = newExons[2],newExons[3]
+ #if len(newExons) != 4:
+ # acc = 0.0
+ #else:
+ # e1_begin,e1_end = newExons[0],newExons[1]
+ # e2_begin,e2_end = newExons[2],newExons[3]
- up_off = int(math.fabs(e1_end - exons[0,1]))
- down_off = int(math.fabs(e2_begin - exons[1,0]))
+ # up_off = int(math.fabs(e1_end - exons[0,1]))
+ # down_off = int(math.fabs(e2_begin - exons[1,0]))
- return up_off,down_off
+ #return up_off,down_off
if __name__ == '__main__':
qpalma = QPalma()
use_quality_scores = False
elif Configuration.mode == 'using_quality_scores':
- Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
+ #Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
- end = 50
- Sequences = Sequences[:end]
- Exons = Exons[:end]
- Ests = Ests[:end]
- Qualities = Qualities[:end]
- SplitPos = SplitPos[:end]
+ #end = 50
+ #Sequences = Sequences[:end]
+ #Exons = Exons[:end]
+ #Ests = Ests[:end]
+ #Qualities = Qualities[:end]
+ #SplitPos = SplitPos[:end]
- Donors, Acceptors = getDonAccScores(Sequences)
-
- #Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+ #Donors, Acceptors = getDonAccScores(Sequences)
- #Sequences_, Acceptors_, Donors_, Exons_, Ests_, Qualities_ = generateData(100)
+ Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
#Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
#Qualities = []
if exons[-1,1] > len(dna):
continue
- pdb.set_trace()
+ #pdb.set_trace()
# Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
- print 'trueWeights'
+ #print 'trueWeights'
# Calculate the weights
trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
currentPhi[donSP+accSP:donSP+accSP+lengthSP] = mat(h.penalties[:]).reshape(lengthSP,1)
currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP] = mmatrix[:]
+ #pdb.set_trace()
if Configuration.mode == 'using_quality_scores':
totalQualityPenalties = param[-totalQualSP:]
currentPhi[donSP+accSP+lengthSP+mmatrixSP:] = totalQualityPenalties[:]
currentAlignment = QPalmaDP.Alignment(Configuration.numQualPlifs,Configuration.numQualSuppPoints, use_quality_scores)
c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
-
#print 'Calling myalign...'
# calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
path_loss = [0]*(num_path[exampleIdx])
for pathNr in range(num_path[exampleIdx]):
- print 'decodedWeights'
+ #print 'decodedWeights'
weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,
h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,
acc_supp,True)