+ some bugfixes
[qpalma.git] / tests / test_sequence_utils.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import pdb
5 import numpy
6 import qpalma.sequence_utils
7
8 #flat_files = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/tests/test_data'
9
10 #begin = 0
11 #end = 60
12 #dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
13 #dna = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
14 #print dna
15
16 def check_positions(dna,acc,don,offset=0):
17 first_gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')][:offset]
18 first_ag_tuple_pos = [p for p,e in enumerate(dna) if p>1 and p<len(dna) and dna[p-1]=='a' and dna[p]=='g'][:offset]
19
20 first_acc_scores = [p for p,e in enumerate(acc) if e != -numpy.inf][:offset]
21 first_don_scores = [p for p,e in enumerate(don) if e != -numpy.inf][:offset]
22
23 last_gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')][-offset:]
24 last_ag_tuple_pos = [p for p,e in enumerate(dna) if p>1 and p<len(dna) and dna[p-1]=='a' and dna[p]=='g'][-offset:]
25
26 last_acc_scores = [p for p,e in enumerate(acc) if e != -numpy.inf][-offset:]
27 last_don_scores = [p for p,e in enumerate(don) if e != -numpy.inf][-offset:]
28
29 assert first_gt_tuple_pos == first_don_scores
30 assert first_ag_tuple_pos == first_acc_scores
31
32 assert last_gt_tuple_pos == last_don_scores
33 assert last_ag_tuple_pos == last_acc_scores
34
35
36 def run():
37 flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
38 chromo = 1
39 strand = '+'
40 begin = 0
41 end = 100000000
42 dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
43
44 check_positions(dna,acc,don,100)
45
46
47 def run2():
48 flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
49 chromo = 8
50 strand = '-'
51 begin = 0
52 end = 100000000
53 dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
54
55 check_positions(dna,acc,don,100)
56
57 pdb.set_trace()
58
59
60 def run3():
61 flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
62
63 begin = 0
64 end = 100000000
65 full_pos_dna = qpalma.sequence_utils.get_seq_and_scores(1,'+',begin,end,flat_files,True)
66
67 begin = 0
68 end = 500
69 dna = qpalma.sequence_utils.get_seq_and_scores(8,'+',begin,end,flat_files,True)
70
71 pos_dna = full_pos_dna[-500:]
72 r_dna = qpalma.sequence_utils.reverse_complement(dna)
73 pos_dna == r_dna
74 pdb.set_trace()
75
76 def run4():
77 flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
78 chromo = 8
79 strand = '-'
80 begin = 200
81 end = 1200
82 dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
83
84 check_positions(dna,acc,don)
85
86 pdb.set_trace()
87
88
89 def check_negative_strand(b,e):
90 flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
91 chromo = 8
92 strand = '-'
93 begin = b
94 end = e
95 dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
96
97 pdb.set_trace()
98
99 check_positions(dna,acc,don)
100
101 print 'fine'
102
103 def check_positive_strand(b,e):
104 flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
105 chromo = 1
106 strand = '+'
107 begin = b
108 end = e
109 dna,acc,don = qpalma.sequence_utils.get_seq_and_scores(chromo,strand,begin,end,flat_files)
110
111 check_positions(dna,acc,don)
112 print 'fine'
113
114 if __name__ == '__main__':
115 #run()
116 #run2()
117 #run3(
118 run4()