+ added a script that processes the single lane files to create a full dataset
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 30 May 2008 09:00:10 +0000 (09:00 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 30 May 2008 09:00:10 +0000 (09:00 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9297 e1793c9e-67f9-0310-80fc-b846ff1f7b36

tools/run_specific_scripts/transcriptome_analysis/README [new file with mode: 0644]
tools/run_specific_scripts/transcriptome_analysis/createFullMap.sh [new file with mode: 0755]

diff --git a/tools/run_specific_scripts/transcriptome_analysis/README b/tools/run_specific_scripts/transcriptome_analysis/README
new file mode 100644 (file)
index 0000000..55d4976
--- /dev/null
@@ -0,0 +1,13 @@
+Steps
+-----
+
+The data was taken from
+
+/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_40/
+
+directories 1 to 3
+
+We combined the map.vm and map_2nd.vm for all these three dirs.
+
+fabio@congo:/fml/ag-raetsch/share/projects/qpalma/solexa/scripte/first_transcriptome_run$ 
+
diff --git a/tools/run_specific_scripts/transcriptome_analysis/createFullMap.sh b/tools/run_specific_scripts/transcriptome_analysis/createFullMap.sh
new file mode 100755 (executable)
index 0000000..439c343
--- /dev/null
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# This script collects data from 
+# /media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_40/
+# and combines the map.vm files to a single map.vm resp. map_2nd.vm
+# 
+
+function combine_and_check_maps {
+   # specifies project dir
+   run_dir=$1
+   # specifies vmatch round (either main or spliced)
+   round_dir=$2
+   # this is the result file ( map.vm or map_2nd.vm)
+   new_map=$3
+
+   current_map=$run_dir/1/length_38/${round_dir}/map.vm
+   echo "processing $current_map ..."
+   cat  $current_map > $new_map
+   lane1_read_ctr=`cut -f1 $current_map | sort -u | wc -l`
+
+   current_map=$run_dir/2/length_38/${round_dir}/map.vm
+   echo "processing $current_map ..."
+   cat $current_map >> $new_map
+   lane2_read_ctr=`cut -f1 $current_map | sort -u | wc -l`
+
+   current_map=$run_dir/3/length_38/${round_dir}/map.vm
+   echo "processing $current_map ..."
+   cat $current_map >> $new_map
+   lane3_read_ctr=`cut -f1 $current_map | sort -u | wc -l`
+
+   combined_lanes_ctr=`echo "$lane1_read_ctr + $lane2_read_ctr + $lane3_read_ctr" | bc`
+   combined_read_ctr=`cut -f1 $new_map | sort -u | wc -l`
+
+   # here we check whether the number of reads of the combined file is the sum
+   # of the reads of the single lane files
+   if [ $combined_lanes_ctr -ne $combined_read_ctr ] ; then
+      echo "Error during combination of first vmatch run reads!"
+   fi
+}
+
+run_dir=/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_40
+
+#mkdir /tmp/fabio
+map_1st=/tmp/fabio/map.vm
+map_2nd=/tmp/fabio/map_2nd.vm
+
+combine_and_check_maps $run_dir "main" $map_1st
+combine_and_check_maps $run_dir "spliced" $map_2nd