+ saving changes in the dataset generation
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 3 Jul 2008 10:52:59 +0000 (10:52 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 3 Jul 2008 10:52:59 +0000 (10:52 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9853 e1793c9e-67f9-0310-80fc-b846ff1f7b36

tools/run_specific_scripts/transcriptome_analysis/README
tools/run_specific_scripts/transcriptome_analysis/combine_spliced_map_parts.sh
tools/run_specific_scripts/transcriptome_analysis/compare_predictions/.compare.c.swp [deleted file]
tools/run_specific_scripts/transcriptome_analysis/compile_dataset.py
tools/run_specific_scripts/transcriptome_analysis/createNewDataset.py

index 0a20713..2fd2254 100644 (file)
@@ -54,8 +54,9 @@ Old settings 1 mismatch:
 /media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced
 
 
-
 2 Mismatches (stepwise trimming -3 from 35 to 20)
  
 /media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced_3
 
+
+
index 5959484..07a7564 100755 (executable)
@@ -1,11 +1,16 @@
 #!/bin/bash
 
-touch map.vm.spliced
-rm map.vm.spliced
+result_dir=$1
 
-for((idx=0;idx<10;idx++))
+result_fn=$result_dir/map.vm.spliced
+
+touch $result_fn
+rm $result_fn
+
+for chunk in `ls -1 $result_dir/*.heuristic.spliced`
 do
-   cat map.vm.part_${idx}.heuristic.spliced >> map.vm.spliced
+   echo $chunk 
+   cat $chunk >> $result_fn
 done
 
 
diff --git a/tools/run_specific_scripts/transcriptome_analysis/compare_predictions/.compare.c.swp b/tools/run_specific_scripts/transcriptome_analysis/compare_predictions/.compare.c.swp
deleted file mode 100644 (file)
index e69de29..0000000
index ba68e47..89547df 100644 (file)
@@ -14,7 +14,7 @@ import qpalma.tools
 from qpalma.parsers import *
 from qpalma.sequence_utils import *
 
-import qpalma.Configuration as Conf
+import QPalmaConfiguration as Conf
 
 #
 # This script takes as input the map.vm map_2nd.vm files and generates QPalma
@@ -49,14 +49,14 @@ class DatasetGenerator:
 
    def __init__(self,map_file,map_2nd_file):
       assert os.path.exists(map_file), 'Error: Can not find map file'
+      print map_2nd_file
       assert os.path.exists(map_2nd_file), 'Error: Can not find map_2nd file'
       self.map_file = map_file
       self.map_2nd_file = map_2nd_file
 
       self.dataset = []
 
-      #self.read_size = 38
-      self.read_size = 36
+      self.read_size = Conf.read_size
 
 
    def saveAs(self,dataset_file):
@@ -170,7 +170,7 @@ class DatasetGenerator:
       # usually we have two files to parse:
       # the map file from the second run and a subset of the map file from the
       # first run
-      #dataset = self.parse_map_file(dataset,self.map_file,True)
+      dataset = self.parse_map_file(dataset,self.map_file,True)
       dataset = self.parse_map_file(dataset,self.map_2nd_file,False)
 
       self.dataset = dataset
index 42ed713..e32dde5 100644 (file)
@@ -3,23 +3,25 @@
 
 import os.path
 
-import qpalma.Configuration as Conf
 from compile_dataset import DatasetGenerator
 
 jp = os.path.join
 
 #working_dir='/fml/ag-raetsch/home/fabio/tmp/transcriptome_data'
 #working_dir='/fml/ag-raetsch/home/fabio/tmp/transcriptome_data'
+#working_dir='/fml/ag-raetsch/share/projects/qpalma/solexa/new_run2/mapping/spliced'
 
-working_dir='/fml/ag-raetsch/share/projects/qpalma/solexa/new_run2/mapping/spliced'
+main_dir = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/run_0/'
 
+#spliced_dir = '/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced'
+#result_dir = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/run_1/'
 
-result_dir='/fml/ag-raetsch/home/fabio/tmp/sandbox'
+spliced_dir = '/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_44/4/length_38/spliced_3'
+result_dir = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/run_2/'
 
-#map_1_fn = jp(working_dir,'map.vm.spliced')
-map_1_fn = jp(working_dir,'spliced.heuristic')
-map_2_fn = jp(working_dir,'map.vm')
+map_1_fn = jp(main_dir,'map.vm.spliced')
+map_2_fn = jp(spliced_dir,'map.vm')
 
 dg = DatasetGenerator(map_1_fn,map_2_fn)
 dg.compile_dataset()
-dg.saveAs(jp(result_dir,'dataset_neg_strand_testcase'))
+dg.saveAs(jp(result_dir,'dataset_run_2.pickle'))