+ fixed other bug (missing n last entries of the acceptor scores)
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 4 Jun 2008 14:30:17 +0000 (14:30 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Wed, 4 Jun 2008 14:30:17 +0000 (14:30 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9389 e1793c9e-67f9-0310-80fc-b846ff1f7b36

qpalma/sequence_utils.py

index aac593d..d6864a8 100644 (file)
@@ -166,6 +166,8 @@ def get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_file
       assert gt_tuple_pos == [p for p,e in enumerate(currentDon) if e != -inf and p > 0], pdb.set_trace()
       assert len(currentAcc) == len(currentDon)
 
+      pdb.set_trace()
+
       return genomicSeq, currentAcc, currentDon
 
    # build reverse complement if on negative strand
@@ -173,7 +175,6 @@ def get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_file
       fn = 'chr%d.dna.flat' % chr
       filename = os.path.join(dna_flat_files,fn)
       cmd =  'wc -c %s | cut -f1 -d \' \'' % filename
-      #print cmd
       import subprocess
       obj = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
       out,err = obj.communicate()
@@ -182,23 +183,19 @@ def get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_file
          print 'Error occurred while trying to obtain file size'
       end = int(out)
 
-      #print 'size is %d' % end
-
-      intervalBegin = genomicSeq_start-100
-      intervalEnd    = genomicSeq_stop+100
-
-      #print 'before getSpliceScores'
-      #print intervalBegin ,intervalEnd
+      intervalBegin = genomicSeq_start#-100
+      intervalEnd    = genomicSeq_stop#+100
 
       total_size = end
       currentAcc, currentDon = getSpliceScores(chr,strand,intervalBegin,intervalEnd,total_size)
 
-      currentAcc = currentAcc[100:-98]
+      #currentAcc = currentAcc[100:-98]
       currentAcc = currentAcc[1:]
-      currentDon = currentDon[100:-100]
+      #currentDon = currentDon[100:-100]
 
       length = len(genomicSeq)
       currentAcc = currentAcc[:length]
+      currentAcc = currentAcc+[-inf]*(length-len(currentAcc))
 
       currentDon = currentDon+[-inf]*(length-len(currentDon))
 
@@ -207,6 +204,6 @@ def get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,dna_flat_file
 
       assert ag_tuple_pos == [p for p,e in enumerate(currentAcc) if e != -inf and p > 1], pdb.set_trace()
       assert gt_tuple_pos == [p for p,e in enumerate(currentDon) if e != -inf and p > 0], pdb.set_trace()
-      assert len(currentAcc) == len(currentDon)
+      assert len(currentAcc) == len(currentDon), pdb.set_trace()
 
       return genomicSeq, currentAcc, currentDon