delete[] matrices[i];
}
-void Alignment::getAlignmentResultsOld(int* s_align, int* e_align,
+void Alignment::getAlignmentResults(int* s_align, int* e_align,
int* mmatrix_p, double* alignscores, double* qScores) {
size_t idx;
//printf("Leaving getAlignmentResults...\n");
}
-void Alignment::getAlignmentArraysOld(int* dna_align, int* est_align) {
+void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
int idx;
for(idx=0; idx<result_len; idx++) {
}
-PyObject* Alignment::getAlignmentResults() {
+PyObject* Alignment::getAlignmentResultsNew() {
- PyObject* result = PyTuple_New(5);
PyObject* py_splice_align = PyList_New(splice_align_size);
PyObject* py_est_align = PyList_New(est_align_size);
PyObject* py_mmatrix = PyList_New(mmatrix_param_size);
//printf("\nctr is %d\n",ctr);
}
- PyTuple_SetItem(result,0,py_splice_align);
- PyTuple_SetItem(result,1,py_est_align);
- PyTuple_SetItem(result,2,py_mmatrix);
- PyTuple_SetItem(result,3,py_align_scores);
- PyTuple_SetItem(result,4,py_q_scores);
+ PyObject* result = PyTuple_Pack(5,
+ py_splice_align, py_est_align, py_mmatrix,
+ py_align_scores, py_q_scores);
+
+ //PyTuple_SetItem(result,0,py_splice_align);
+ //PyTuple_SetItem(result,1,py_est_align);
+ //PyTuple_SetItem(result,2,py_mmatrix);
+ //PyTuple_SetItem(result,3,py_align_scores);
+ //PyTuple_SetItem(result,4,py_q_scores);
return result;
}
-PyObject* Alignment::getAlignmentArrays() {
+PyObject* Alignment::getAlignmentArraysNew() {
- PyObject* result = PyTuple_New(2);
- PyObject* dna_align = PyList_New(result_len);
- PyObject* est_align = PyList_New(result_len);
+ PyObject* py_dna_align = PyList_New(result_len);
+ PyObject* py_est_align = PyList_New(result_len);
- int idx;
- for(idx=0; idx<result_len; idx++) {
- PyList_SetItem(dna_align,idx,PyInt_FromLong(DNA_ARRAY[idx]));
- PyList_SetItem(est_align,idx,PyInt_FromLong(EST_ARRAY[idx]));
+ for(size_t idx=0; idx<result_len; idx++) {
+ PyList_SetItem(py_dna_align,idx,PyInt_FromLong(DNA_ARRAY[idx]));
+ PyList_SetItem(py_est_align,idx,PyInt_FromLong(EST_ARRAY[idx]));
}
+ PyObject* result = PyTuple_Pack(2, py_dna_align, py_est_align);
return result;
}
from qpalma.TrainingParam import Param
from qpalma.Plif import Plf,compute_donacc
+# these two imports are needed for the load genomic resp. interval query
+# functions
+#from Genefinding import *
+#from genome_utils import load_genomic
from Utils import calc_stat, calc_info, pprint_alignment, get_alignment
class SpliceSiteException:
dna_flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
dna, acc_supp, don_supp = get_seq_and_scores(chr,strand,up_cut,down_cut,dna_flat_files)
+ # splice score is located at g of ag
+ #ag_tuple_pos = [p for p,e in enumerate(dna) if p>1 and dna[p-1]=='a' and dna[p]=='g' ]
+ #assert ag_tuple_pos == [p for p,e in enumerate(acc_supp) if e != -inf and p > 1], pdb.set_trace()
+ #gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')]
+ #assert gt_tuple_pos == [p for p,e in enumerate(don_supp) if e != -inf and p > 0], pdb.set_trace()
+
original_exons = currentExons
exons = original_exons - (up_cut-1)
exons[0,0] -= 1
c_dna_array = QPalmaDP.createIntArrayFromList([0]*(result_len))
c_est_array = QPalmaDP.createIntArrayFromList([0]*(result_len))
- currentAlignment.getAlignmentArraysOld(c_dna_array,c_est_array)
- __dna_array,__est_array = currentAlignment.getAlignmentArrays()
+ currentAlignment.getAlignmentArrays(c_dna_array,c_est_array)
+ _dna_array,_est_array = currentAlignment.getAlignmentArraysNew()
dna_array = [0.0]*result_len
est_array = [0.0]*result_len
dna_array = None
est_array = None
- assert dna_array == __dna_array
- assert est_array == __est_array
+ assert dna_array == _dna_array
+ assert est_array == _est_array
- currentAlignment.getAlignmentResultsOld(c_SpliceAlign, c_EstAlign,\
+ currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
c_WeightMatch, c_DPScores, c_qualityPlifsFeatures)
- __newSpliceAlign, __newEstAlign, __newWeightMatch, __newDPScores, __newQualityPlifsFeatures =\
- currentAlignment.getAlignmentResults()
+ _SpliceAlign, _EstAlign, _WeightMatch, _DPScores, _newQualityPlifsFeatures =\
+ currentAlignment.getAlignmentResultsNew()
newSpliceAlign = zeros((current_num_path*dna_len,1))
newEstAlign = zeros((est_len*current_num_path,1))
del c_qualityPlifsFeatures
del currentAlignment
- assert newSpliceAlign == __newSpliceAlign
- assert newEstAlign == __newEstAlign
- assert newWeightMatch == __newWeightMatch
- assert newDPScores == __newDPScores
- assert newQualityPlifsFeatures == __newQualityPlifsFeatures
+
+ assert newSpliceAlign.flatten().tolist()[0] == _SpliceAlign
+ assert newEstAlign.flatten().tolist()[0] == _EstAlign
+ assert newWeightMatch.flatten().tolist()[0] == _WeightMatch
+ assert newDPScores.flatten().tolist()[0] == _DPScores
+ assert newQualityPlifsFeatures.flatten().tolist()[0] == _newQualityPlifsFeatures
+
+ pdb.set_trace()
return newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
newQualityPlifsFeatures, dna_array, est_array
#
###############################################################################
- def init_prediction(self,run,dataset_fn,prediction_keys,param_fn,seqInfo,set_name):
+ def init_prediction(self,run,dataset_fn,prediction_keys,param_fn,seqInfo,set_name,):
"""
Performing a prediction takes...
"""
self.run = run
self.set_name = set_name
+ #full_working_path = os.path.join(run['alignment_dir'],run['name'])
full_working_path = run['result_dir']
print 'full_working_path is %s' % full_working_path
self.logfh = open('_qpalma_predict_%s.log'%set_name,'w+')
+
# number of prediction instances
self.plog('Number of prediction examples: %d\n'% len(prediction_keys))
def predict(self,run,prediction_set,param,seqInfo):
"""
- This method contains the prediction loop over all reads of the given dataset.
+ This method...
"""
self.run = run
- self.read_size = 38
- self.use_quality_scores = False
- if self.run['mode'] == 'using_quality_scores':
- use_quality_scores = True
+ if self.run['mode'] == 'normal':
+ self.use_quality_scores = False
+
+ elif self.run['mode'] == 'using_quality_scores':
+ self.use_quality_scores = True
+ else:
+ assert(False)
# Set the parameters such as limits/penalties for the Plifs
[h,d,a,mmatrix,qualityPlifs] =\
#totalQualSP = self.run['totalQualSuppPoints']
#totalQualityPenalties = zeros((totalQualSP,1))
- problem_ctr = 0
+ self.problem_ctr = 0
# where we store the predictions
allPredictions = []
# we take the first quality vector of the tuple of quality vectors
quality_index = 0
+ # fetch the data needed
+ #g_dir = run['dna_flat_files'] #'/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
+ #acc_dir = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+ #don_dir = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+ #g_fmt = 'chr%d.dna.flat'
+ #s_fmt = 'contig_%d%s'
+
+ #num_chromo = 6
+
+ #accessWrapper = DataAccessWrapper(g_dir,acc_dir,don_dir,g_fmt,s_fmt)
+ #seqInfo = SeqSpliceInfo(accessWrapper,range(1,num_chromo))
+
# beginning of the prediction loop
for example_key in prediction_set.keys():
+ print 'Current example %d' % example_key
+
for example in prediction_set[example_key]:
currentSeqInfo,read,currentQualities = example
if not self.qpalma_debug_mode:
self.plog('Loading example id: %d...\n'% int(id))
- if use_quality_scores:
+ if run['mode'] == 'normal':
+ quality = [40]*len(read)
+
+ if run['mode'] == 'using_quality_scores':
quality = currentQualities[quality_index]
- else:
+
+ if not run['enable_quality_scores']:
quality = [40]*len(read)
try:
currentDNASeq, currentAcc, currentDon = seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop)
except:
- problem_ctr += 1
+ self.problem_ctr += 1
continue
if not run['enable_splice_signals']:
- for idx in range(len(currentDon)):
- if currentDon[idx] != -inf:
+ for idx,elem in enumerate(currentDon):
+ if elem != -inf:
currentDon[idx] = 0.0
- if currentAcc[idx] != -inf:
+ for idx,elem in enumerate(currentAcc):
+ if elem != -inf:
currentAcc[idx] = 0.0
current_prediction = self.calc_alignment(currentDNASeq, read,\
allPredictions.append(current_prediction)
if not self.qpalma_debug_mode:
- self.finalizePrediction(allPredictions,problem_ctr)
+ self.finalizePrediction(allPredictions)
else:
return allPredictions
- def finalizePrediction(self,allPredictions,problem_ctr):
+ def finalizePrediction(self,allPredictions):
# end of the prediction loop we save all predictions in a pickle file and exit
cPickle.dump(allPredictions,open('%s.predictions.pickle'%(self.set_name),'w+'))
print 'Prediction completed'
self.plog('Prediction completed\n')
- mes = 'Problem ctr %d' % problem_ctr
+ mes = 'Problem ctr %d' % self.problem_ctr
print mes
self.plog(mes+'\n')
self.logfh.close()
if '-' in read:
self.plog('found gap\n')
read = read.replace('-','')
- assert len(read) == self.read_size
+ assert len(read) == Conf.read_size
dna_len = len(dna)
read_len = len(read)
true_map[0] = 1
pathNr = 0
+ #pdb.set_trace()
+
_newSpliceAlign = array.array('B',newSpliceAlign.flatten().tolist()[0])
_newEstAlign = array.array('B',newEstAlign.flatten().tolist()[0])