+ fixed some bugs in the negative strand lookup table
[qpalma.git] / qpalma / sequence_utils.py
index 5e00cee..46435e1 100644 (file)
@@ -6,12 +6,22 @@ import pdb
 import random
 import re
 import sys
 import random
 import re
 import sys
+import subprocess
 
 from numpy.matlib import inf
 
 from Genefinding import *
 from genome_utils import load_genomic
 
 
 from numpy.matlib import inf
 
 from Genefinding import *
 from genome_utils import load_genomic
 
+def get_flatfile_size(filename):
+   cmd =  'wc -c %s | cut -f1 -d \' \'' % filename
+   obj = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+   out,err = obj.communicate()
+   
+   if err != '':
+      print 'Error occurred while trying to obtain file size'
+   return int(out)
+
 
 def reverse_complement(seq):
    """
 
 def reverse_complement(seq):
    """
@@ -172,14 +182,7 @@ def get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_file
    if strand == '-':
       fn = 'chr%d.dna.flat' % chr
       filename = os.path.join(dna_flat_files,fn)
    if strand == '-':
       fn = 'chr%d.dna.flat' % chr
       filename = os.path.join(dna_flat_files,fn)
-      cmd =  'wc -c %s | cut -f1 -d \' \'' % filename
-      import subprocess
-      obj = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-      out,err = obj.communicate()
-      
-      if err != '':
-         print 'Error occurred while trying to obtain file size'
-      end = int(out)
+      end = get_flatfile_size(filename)
 
       intervalBegin = genomicSeq_start-100
       intervalEnd    = genomicSeq_stop+100
 
       intervalBegin = genomicSeq_start-100
       intervalEnd    = genomicSeq_stop+100