03c9d0644ad72d6dab6b2ba2b44d0dba47c1e146
[qpalma.git] / tools / data_tools / createTestSet.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import os.path
6 import csv
7 from PyGff import *
8 import cPickle
9
10 info=\
11 """
12 You have to supply two files. One containing the gff information and the other
13 one containing the information of the Solexa(R) reads.
14
15 Usage: ./createTestSet.py gff.pickle reads.pickle
16 """
17
18 doc=\
19 """
20 Make sure that you split the data into chromosome files beforehand because
21 this method does not check the chromosome info
22 """
23
24 def check(annot,reads):
25 print len(annot)
26 print len(reads)
27
28
29
30
31 if __name__ == '__main__':
32 assert len(sys.argv) >= 3, info
33 annotFile = sys.argv[1]
34 readsFile = sys.argv[2]
35 assert os.path.exists(annotFile), 'File %s does not exist!' % annotFile
36 assert os.path.exists(readsFile), 'File %s does not exist!' % readsFile
37 print doc
38
39 annotation = cPickle.load(open(annotFile))
40 reads = cPickle.load(open(readsFile))
41
42 check(annotation,reads)
43