+ optimized dataset generation/storage
[qpalma.git] / tools / run_specific_scripts / transcriptome_analysis / createFullMap.sh
1 #!/bin/bash
2
3 # This script collects data from
4 # /media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_40/
5 # and combines the map.vm files to a single map.vm resp. map_2nd.vm
6 #
7
8 function combine_and_check_maps {
9 # specifies project dir
10 run_dir=$1
11 # specifies vmatch round (either main or spliced)
12 round_dir=$2
13 # this is the result file ( map.vm or map_2nd.vm)
14 new_map=$3
15 logfile=$4
16
17 echo "Starting logfile entries for $new_map" >> $logfile
18
19 current_map=$run_dir/1/length_38/${round_dir}/map.vm
20 echo "processing $current_map ..."
21 cat $current_map > $new_map
22 lane1_read_ctr=`cut -f1 $current_map | sort -u | wc -l`
23 echo "lane1 reads $lane1_read_ctr" >> $logfile
24
25 current_map=$run_dir/2/length_38/${round_dir}/map.vm
26 echo "processing $current_map ..."
27 cat $current_map >> $new_map
28 lane2_read_ctr=`cut -f1 $current_map | sort -u | wc -l`
29 echo "lane2 reads $lane2_read_ctr" >> $logfile
30
31 current_map=$run_dir/3/length_38/${round_dir}/map.vm
32 echo "processing $current_map ..."
33 cat $current_map >> $new_map
34 lane3_read_ctr=`cut -f1 $current_map | sort -u | wc -l`
35 echo "lane3 reads $lane3_read_ctr" >> $logfile
36
37 combined_lanes_ctr=`echo "$lane1_read_ctr + $lane2_read_ctr + $lane3_read_ctr" | bc`
38 combined_read_ctr=`cut -f1 $new_map | sort -u | wc -l`
39 echo "$new_map reads $combined_read_ctr" >> $logfile
40
41 # here we check whether the number of reads of the combined file is the sum
42 # of the reads of the single lane files
43 if [ $combined_lanes_ctr -ne $combined_read_ctr ] ; then
44 echo "Error during combination of first vmatch run reads!"
45 fi
46 }
47
48 run_dir=/media/oka_raid/backup/data/solexa_analysis/ATH/Transcriptome/Col-0/run_40
49
50 #mkdir /tmp/fabio
51 map_1st=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/map.vm
52 map_2nd=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/map_2nd.vm
53 logfile=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/logfile.txt
54
55 touch $logfile && rm $logfile
56
57 combine_and_check_maps $run_dir "main" $map_1st $logfile
58 combine_and_check_maps $run_dir "spliced" $map_2nd $logfile