/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced
-
2 Mismatches (stepwise trimming -3 from 35 to 20)
/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced_3
+
+
#!/bin/bash
-touch map.vm.spliced
-rm map.vm.spliced
+result_dir=$1
-for((idx=0;idx<10;idx++))
+result_fn=$result_dir/map.vm.spliced
+
+touch $result_fn
+rm $result_fn
+
+for chunk in `ls -1 $result_dir/*.heuristic.spliced`
do
- cat map.vm.part_${idx}.heuristic.spliced >> map.vm.spliced
+ echo $chunk
+ cat $chunk >> $result_fn
done
from qpalma.parsers import *
from qpalma.sequence_utils import *
-import qpalma.Configuration as Conf
+import QPalmaConfiguration as Conf
#
# This script takes as input the map.vm map_2nd.vm files and generates QPalma
def __init__(self,map_file,map_2nd_file):
assert os.path.exists(map_file), 'Error: Can not find map file'
+ print map_2nd_file
assert os.path.exists(map_2nd_file), 'Error: Can not find map_2nd file'
self.map_file = map_file
self.map_2nd_file = map_2nd_file
self.dataset = []
- #self.read_size = 38
- self.read_size = 36
+ self.read_size = Conf.read_size
def saveAs(self,dataset_file):
# usually we have two files to parse:
# the map file from the second run and a subset of the map file from the
# first run
- #dataset = self.parse_map_file(dataset,self.map_file,True)
+ dataset = self.parse_map_file(dataset,self.map_file,True)
dataset = self.parse_map_file(dataset,self.map_2nd_file,False)
self.dataset = dataset
import os.path
-import qpalma.Configuration as Conf
from compile_dataset import DatasetGenerator
jp = os.path.join
#working_dir='/fml/ag-raetsch/home/fabio/tmp/transcriptome_data'
#working_dir='/fml/ag-raetsch/home/fabio/tmp/transcriptome_data'
+#working_dir='/fml/ag-raetsch/share/projects/qpalma/solexa/new_run2/mapping/spliced'
-working_dir='/fml/ag-raetsch/share/projects/qpalma/solexa/new_run2/mapping/spliced'
+main_dir = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/run_0/'
+#spliced_dir = '/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced'
+#result_dir = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/run_1/'
-result_dir='/fml/ag-raetsch/home/fabio/tmp/sandbox'
+spliced_dir = '/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced_3'
+result_dir = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/run_2/'
-#map_1_fn = jp(working_dir,'map.vm.spliced')
-map_1_fn = jp(working_dir,'spliced.heuristic')
-map_2_fn = jp(working_dir,'map.vm')
+map_1_fn = jp(main_dir,'map.vm.spliced')
+map_2_fn = jp(spliced_dir,'map.vm')
dg = DatasetGenerator(map_1_fn,map_2_fn)
dg.compile_dataset()
-dg.saveAs(jp(result_dir,'dataset_neg_strand_testcase'))
+dg.saveAs(jp(result_dir,'dataset_run_2.pickle'))