+ parser works now with mmap and sscanf
[qpalma.git] / tools / data_tools / parseGff.py
index 5329b8e..a721633 100644 (file)
@@ -6,6 +6,7 @@ import os.path
 import csv
 from PyGff import *
 import cPickle
+import copy
 
 def parse(gff_fid,pickle_fid):
    reader = csv.reader(gff_fid, delimiter='\t', quoting=csv.QUOTE_NONE)
@@ -27,7 +28,6 @@ def parse(gff_fid,pickle_fid):
       if id == 'gene':
          if currentGene != None:
             allGenes.append(currentGene)
-            currentGene = None
          
          currentGene = Gene(chr,start,stop,strand)
 
@@ -41,6 +41,7 @@ def parse(gff_fid,pickle_fid):
          pass
 
       elif id == 'exon':
+         assert currentGene != None
          currentGene.addExon(start,stop)
 
       elif id == 'CDS':
@@ -55,13 +56,16 @@ def parse(gff_fid,pickle_fid):
       elif id == 'pseudogenic_transcript':
          pass
 
-      elif id == 'snoRNA':
+      elif id == 'miRNA':
          pass
 
-      elif id == 'snRNA':
+      elif id == 'rRNA':
          pass
 
-      elif id == 'miRNA':
+      elif id == 'snoRNA':
+         pass
+
+      elif id == 'snRNA':
          pass
 
       elif id == 'tRNA':
@@ -71,12 +75,13 @@ def parse(gff_fid,pickle_fid):
          if currentGene != None:
             allGenes.append(currentGene)
             currentGene = None
-
       else:
          assert False, 'Error: Unknown identifier \'%s\'' % id
 
-   cPickle.dump(allGenes,pickle_fid)
+   if currentGene != None:
+      allGenes.append(currentGene)
 
+   cPickle.dump(allGenes,pickle_fid)
       
 if __name__ == '__main__':
    assert len(sys.argv) >= 3