#Acceptors.append([-inf]*currentSize)
#Donors.append([-inf]*currentSize)
- exon_idx = int(exon_idx)-1
+ exon_idx = int(exon_idx)
+
+ assert exon_idx > 0
#print "%d " % exon_idx,
currentExons = zeros((2,2))
#for idx in range(len(currentGene.exons)):
up_cut = int(currentExons[0,0]) - cut_offset
down_cut = int(currentExons[1,1]) + cut_offset
-
if up_cut < 0:
up_cut = 0
if down_cut > len(currentSeq):
down_cut = len(currentSeq)
+ assert down_cut - up_cut > 0
+ assert down_cut - up_cut <= len(currentSeq)
+
currentSeq = currentSeq[up_cut:down_cut]
+ #pdb.set_trace()
+
if up_cut > 0:
currentExons[0,0] -= up_cut
currentExons[0,1] -= up_cut
currentExons[1,0] -= up_cut
currentExons[1,1] -= up_cut
-
except:
- pdb.set_trace()
+ continue
+ #pdb.set_trace()
+ #pdb.set_trace()
Sequences.append(currentSeq)
currentSize = len(Sequences[-1])
#Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
#Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
- pdb.set_trace()
+ #pdb.set_trace()
#Qualities = []
#for i in range(len(Ests)):
# Qualities.append([40]*len(Ests[i]))
# Initialize parameter vector / param = numpy.matlib.rand(126,1)
#param_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/python/elegans.param'
- param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_16.pickle'
+ param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_82.pickle'
param = load_param(param_filename)
# Set the parameters such as limits penalties for the Plifs
currentPhi = zeros((Conf.numFeatures,1))
totalQualityPenalties = zeros((totalQualSP,1))
- for exampleIdx in range(self.numExamples):
+ total_up_off = []
+ total_down_off = []
+
+ #for exampleIdx in range(self.numExamples):
+ for exampleIdx in range(200):
dna = Sequences[exampleIdx]
est = Ests[exampleIdx]
AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
# Check wether scalar product + loss equals viterbi score
- print '%f vs %f' % (newDPScores, AlignmentScores[pathNr+1])
+ print '%f vs. %f' % (newDPScores, AlignmentScores[pathNr+1])
# if the pathNr-best alignment is very close to the true alignment consider it as true
if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
true_map[pathNr+1] = 1
- pdb.set_trace()
+ #pdb.set_trace()
- evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+ up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+
+ if up_off > -1:
+ total_up_off.append(up_off)
+ total_down_off.append(down_off)
+
+ total_up = 0
+ total_down = 0
+ for idx in range(len(total_up_off)):
+ total_up += total_up_off[idx]
+ total_down += total_down_off[idx]
+
+ total_up /= len(total_up_off)
+ total_down /= len(total_down_off)
+
+ print 'Mean up_off is %f' % total_up
+ print 'Mean down_off is %f' % total_down
+ #print total_up_off
+ #print total_down_off
+ print 'len is %d' % len(total_up_off)
print 'Prediction completed'
self.logfh.close()
if oldElem == 0 and elem != 0:
newExons.append(pos-1)
- pdb.set_trace()
+ #pdb.set_trace()
+ up_off = -1
+ down_off = -1
+
+ if len(newExons) != 4:
+ acc = 0.0
+ else:
+ e1_begin,e1_end = newExons[0],newExons[1]
+ e2_begin,e2_end = newExons[2],newExons[3]
+
+ up_off = int(math.fabs(e1_end - exons[0,1]))
+ down_off = int(math.fabs(e2_begin - exons[1,0]))
+
+ return up_off,down_off
if __name__ == '__main__':
qpalma = QPalma()
//printf("exon %d %d inton til %d pos %d\n",prev_exon_start,prev_exon_stop,cur_exon_start,pos);
if (cur_exon_start - prev_exon_stop < min_overlap || cur_exon_start < pos ) { // go to next exon
- exon_idx++;
-
if (ue != 0 && dov != 0)
- combine_info(prev_exon_stop,cur_exon_start,upstream_end,ue,downstream_overlap,dov,out_fs,gene_id,exon_idx-1);
+ combine_info(prev_exon_stop,cur_exon_start,upstream_end,ue,downstream_overlap,dov,out_fs,gene_id,exon_idx);
if (uo != 0 && ds != 0)
- combine_info(prev_exon_stop,cur_exon_start,upstream_overlap,uo,downstream_start,ds,out_fs,gene_id,exon_idx-1);
+ combine_info(prev_exon_stop,cur_exon_start,upstream_overlap,uo,downstream_start,ds,out_fs,gene_id,exon_idx);
+ exon_idx++;
ue = uo = ds = dov = 0;
goto exon_label;
}
from shogun.Features import StringCharFeatures,DNA
from shogun.Kernel import WeightedDegreeStringKernel
+import cPickle
+
+from qpalma.DataProc import *
+from qpalma.TrainingParam import Param
+
class splicesites():
"""splice site scores (both donor and acceptor)"""
def example():
"""a test example"""
- import cPickle
-
- from paths_load_data_solexa import paths_load_data_solexa
- from TrainingParam import Param
filenames = {'data_dir':'/fml/ag-raetsch/share/projects/palma/param_files/',\
'param':'arabidopsis_ath1_ss=1_il=1.param.bz2',
ss.load_model(filenames, parameters)
ARGS = Param()
- ginfo_filename = 'genome_info.pickle'
+ ginfo_filename = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/qpalma/genome_info.pickle'
genome_info = cPickle.load(open(ginfo_filename))
- (Sequences, Acceptors, Donors, Exons, Ests, Qualities) \
+ (Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos) \
= paths_load_data_solexa('training',genome_info,ARGS)
dna = dict((str(ix),Sequences[ix]) for ix in xrange(len(Sequences)))