2 # -*- coding: utf-8 -*-
11 def parse(gff_fid
,pickle_fid
):
12 reader
= csv
.reader(gff_fid
, delimiter
='\t', quoting
=csv
.QUOTE_NONE
)
25 if id == 'chromosome':
29 if currentGene
!= None:
30 allGenes
.append(currentGene
)
32 currentGene
= Gene(chr,start
,stop
,strand
)
34 elif id == 'five_prime_UTR':
37 elif id == 'three_prime_UTR':
44 assert currentGene
!= None
45 currentGene
.addExon(start
,stop
)
53 elif id == 'pseudogenic_exon':
56 elif id == 'pseudogenic_transcript':
74 elif id == 'pseudogene':
75 if currentGene
!= None:
76 allGenes
.append(currentGene
)
79 assert False, 'Error: Unknown identifier \'%s\'' % id
81 if currentGene
!= None:
82 allGenes
.append(currentGene
)
84 cPickle
.dump(allGenes
,pickle_fid
)
86 if __name__
== '__main__':
87 assert len(sys
.argv
) >= 3
88 annotFile
= sys
.argv
[1]
89 pickleFile
= sys
.argv
[2]
90 assert os
.path
.exists(annotFile
)
91 assert not os
.path
.exists(pickleFile
)
93 gff_fid
= open(annotFile
)
94 pickle_fid
= open(pickleFile
,'w+')
95 parse(gff_fid
,pickle_fid
)