+ pipeline parse is much faster now,
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 18 Apr 2008 14:37:32 +0000 (14:37 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 18 Apr 2008 14:37:32 +0000 (14:37 +0000)
- convert the other parsers too

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8686 e1793c9e-67f9-0310-80fc-b846ff1f7b36

qpalma/parsers.py

index 62bc029..7bed1f2 100644 (file)
@@ -247,12 +247,6 @@ class PipelineReadParser(ReadParser):
       chr   = int(chr)
       pos   = int(pos)
 
-      if strand == 'D':
-         strand = '+'
-
-      if strand == 'P':
-         strand = '-'
-
       mismatches  = int(mismatches)
       length      = int(length)
       offset      = int(offset)
@@ -268,28 +262,38 @@ class PipelineReadParser(ReadParser):
       line_d = {'id':id, 'chr':chr, 'pos':pos, 'strand':strand,\
       'mismatches':mismatches, 'length':length, 'offset':offset,\
       'seq':seq,'prb':prb,'cal_prb':cal_prb,'chastity':chastity}
-      #'orig_seq':orig_seq,'is_spliced':is_spliced}
 
       return line_d
 
-   def next(self):
-      for line in self.fh:
-         line = line.strip()
-         yield self.parseLine(line)
-
-      raise StopIteration
 
    def parse(self):
-      entries = {}
+      entries = []
 
-      for elem in self.fh:
-         line_d = self.parseLine(elem)
-         id = line_d['id']
-         try:
-            entries[id] = [line_d]
-         except:
-            old_entry = entries[id]
-            old_entry.append(line_d)
-            entries[id] = old_entry
+      all_lines = self.fh.read()
+      all_lines = all_lines.split('\n')
+
+      for parsed_line in all_lines:
+         if parsed_line == '':
+            continue
+
+      #for parsed_line in self.fh:
+         id,chr,pos,strand,mismatches,length,offset,seq,prb,cal_prb,chastity =\
+         parsed_line.split()
+         chr   = int(chr)
+         pos   = int(pos)
+
+         mismatches  = int(mismatches)
+         length      = int(length)
+         offset      = int(offset)
+
+         line_d = {'id':id, 'chr':chr, 'pos':pos, 'strand':strand,\
+         'mismatches':mismatches, 'length':length, 'offset':offset,\
+         'seq':seq,'prb':prb,'cal_prb':cal_prb,'chastity':chastity}
+
+         #line_d = {'id':id, 'chr':chr, 'pos':pos, 'strand':strand,\
+         #'mismatches':mismatches, 'length':length, 'offset':offset,\
+         #'seq':seq,'prb':prb,'cal_prb':cal_prb,'chastity':chastity}
+
+         entries.append((line_d,parsed_line))
 
       return entries