+ added framework code for training modus
[qpalma.git] / scripts / est2gff.sh
1 #!/bin/bash
2
3 g_config=/fml/ag-raetsch/share/databases/genomes/A_thaliana/arabidopsis_tair7/genebuild/genome.config
4
5 # First we count the numbers of incorrect gt and gc positions for the full
6 # alignment set not filtered by coverage numbers etc.
7
8 alignment_file=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/full.align.consistent.unique
9
10 touch gt_dpscores.hist && rm gt_dpscores.hist
11 touch gc_dpscores.hist && rm gc_dpscores.hist
12
13 for CHR in "CHR1" "CHR2" "CHR3" "CHR4" "CHR5"
14 do
15 /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
16 | grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl >> gt_dpscores.hist
17 #| grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gt_dpscores.hist
18
19 /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
20 | grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl >> gc_dpscores.hist
21 #| grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gc_dpscores.hist
22 done
23
24 # Now we determine the numbers of incorrect gt and gc positions for the full
25 # alignment set FILTERED by coverage numbers.
26
27 alignment_file=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/full.align.coverage_filtered
28
29 touch gt_dpscores.hist.coverage_filtered && rm gt_dpscores.hist.coverage_filtered
30 touch gc_dpscores.hist.coverage_filtered && rm gc_dpscores.hist.coverage_filtered
31
32 for CHR in "CHR1" "CHR2" "CHR3" "CHR4" "CHR5"
33 do
34 /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
35 | grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl >> gt_dpscores.hist.coverage_filtered
36 #| grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gt_dpscores.hist.coverage_filtered
37
38 /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
39 | grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl >> gc_dpscores.hist.coverage_filtered
40 #| grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gc_dpscores.hist.coverage_filtered
41 done
42
43 cat gt_dpscores.hist | sort > gt_dpscores.hist.sorted
44 cat gt_dpscores.hist.coverage_filtered | sort > gt_dpscores.hist.coverage_filtered.sorted
45 diff --suppress-common-lines gt_dpscores.hist.coverage_filtered.sorted gt_dpscores.hist.sorted > DIFF