+ added new format for QPalma alignment output
[qpalma.git] / scripts / debugDataset.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from qpalma.DataProc import *
5 from compile_dataset import getSpliceScores, get_seq_and_scores
6
7 def debugDataset():
8 filename = 'dataset_remapped_test_new'
9
10 SeqInfo, Exons, OriginalEsts, Qualities,\
11 AlternativeSequences = paths_load_data(filename,None,None,None)
12
13 beg = 0
14 end = 5000
15
16 SeqInfo = SeqInfo[beg:end]
17 Exons = Exons[beg:end]
18 OriginalEsts= OriginalEsts[beg:end]
19 Qualities = Qualities[beg:end]
20 AlternativeSequences = AlternativeSequences[beg:end]
21
22 for exampleIdx in range(4579,4580):
23 currentSeqInfo = SeqInfo[exampleIdx]
24 chr,strand,up_cut,down_cut = currentSeqInfo
25
26 dna_flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
27 dna, acc_supp, don_supp = get_seq_and_scores(chr,strand,up_cut,down_cut,dna_flat_files)
28
29 currentAlternatives = AlternativeSequences[exampleIdx]
30 for alternative_alignment in currentAlternatives:
31 chr, strand, genomicSeq_start, genomicSeq_stop, currentLabel = alternative_alignment
32 if not chr in range(1,6):
33 return
34
35 print chr, strand, genomicSeq_start, genomicSeq_stop, currentLabel
36 #currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
37
38 pdb.set_trace()
39
40 if __name__ == '__main__':
41 debugDataset()
42