cd6573167c4caa293c7da8d5ef16decd4633529c
[qpalma.git] / tools / dataset_scripts / countNotOnChr1-5_pos.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import qparser
6
7 def check(filename):
8 #all_filtered_reads = '/fml/ag-raetsch/share/projects/qpalma/solexa/new_run/allReads.full'
9 #qparser.parse_reads(all_filtered_reads)
10
11 map = {}
12
13 for line in open(filename):
14
15 sl = line.split()
16 id = int(sl[0])
17 chromo= int(sl[1])
18 strand = sl[3]
19 if strand == 'D':
20 strand = '+'
21 if strand == 'P':
22 strand = '-'
23
24 if map.has_key(id):
25 current_strand,current_chromo = map[id]
26 if current_strand == '+' and current_chromo in range(1,6):
27 continue
28 else:
29 map[id] = (strand,chromo)
30 else:
31 map[id] = (strand,chromo)
32
33 total_ctr = 0
34 spliced_ctr = 0
35 correct_ctr = 0
36 correct_spliced = 0
37 correct_unspliced = 0
38
39 for id,elem in map.iteritems():
40 current_strand,current_chromo = elem
41 if id < 1000000300000:
42 spliced_ctr += 1
43 #print elem
44 if current_strand == '+' and current_chromo in range(1,6):
45 correct_ctr += 1
46 if id < 1000000300000:
47 correct_spliced += 1
48 else:
49 correct_unspliced += 1
50 total_ctr += 1
51
52 print 'total ctr %d' % total_ctr
53 print 'spliced_ctr %d ' % spliced_ctr
54
55 print 'total correct %d' % correct_ctr
56 print 'spliced %d ' % correct_spliced
57 print 'unspliced %d ' % correct_unspliced
58
59
60 if __name__ == '__main__':
61 check(sys.argv[1])