--- /dev/null
+#!/bin/bash
+
+g_config=/fml/ag-raetsch/share/databases/genomes/A_thaliana/arabidopsis_tair7/genebuild/genome.config
+
+# First we count the numbers of incorrect gt and gc positions for the full
+# alignment set not filtered by coverage numbers etc.
+
+alignment_file=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/full.align.consistent.unique
+
+touch gt_dpscores.hist && rm gt_dpscores.hist
+touch gc_dpscores.hist && rm gc_dpscores.hist
+
+for CHR in "CHR1" "CHR2" "CHR3" "CHR4" "CHR5"
+do
+ /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
+ | grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl >> gt_dpscores.hist
+ #| grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gt_dpscores.hist
+
+ /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
+ | grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl >> gc_dpscores.hist
+ #| grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gc_dpscores.hist
+done
+
+# Now we determine the numbers of incorrect gt and gc positions for the full
+# alignment set FILTERED by coverage numbers.
+
+alignment_file=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/full.align.coverage_filtered
+
+touch gt_dpscores.hist.coverage_filtered && rm gt_dpscores.hist.coverage_filtered
+touch gc_dpscores.hist.coverage_filtered && rm gc_dpscores.hist.coverage_filtered
+
+for CHR in "CHR1" "CHR2" "CHR3" "CHR4" "CHR5"
+do
+ /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
+ | grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl >> gt_dpscores.hist.coverage_filtered
+ #| grep -A1 '###' | grep -B1 'gt..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gt_dpscores.hist.coverage_filtered
+
+ /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR + psl2gff.result psl2gff_2.result $g_config \
+ | grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl >> gc_dpscores.hist.coverage_filtered
+ #| grep -A1 '###' | grep -B1 'gc..\.' | grep check_psl |cut -f2 -d '(' | cut -f1 -d ')' >> gc_dpscores.hist.coverage_filtered
+done
+
+cat gt_dpscores.hist | sort > gt_dpscores.hist.sorted
+cat gt_dpscores.hist.coverage_filtered | sort > gt_dpscores.hist.coverage_filtered.sorted
+diff --suppress-common-lines gt_dpscores.hist.coverage_filtered.sorted gt_dpscores.hist.sorted > DIFF
--- /dev/null
+#!/bin/bash
+
+#
+# This script takes the alignment output of QPalma and uses the script psl2gff
+# to cluster the reads together in order to infer gene structures.
+#
+# The result is a (are several) gff file(s) that are used as input to the
+# compute_splicegraph script of the genebuild project.
+#
+
+g_config=/fml/ag-raetsch/share/databases/genomes/A_thaliana/arabidopsis_tair7/genebuild/genome.config
+
+# First we count the numbers of incorrect gt and gc positions for the full
+# alignment set not filtered by coverage numbers etc.
+
+alignment_file=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/full.align.consistent.unique
+
+for CHR in "CHR1" "CHR2" "CHR3" "CHR4" "CHR5"
+do
+ for STRAND in "+" "-"
+ do
+ echo $CHR $STRAND
+ result1_fn=/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/SpliceGraphResults/gff/myexons_${CHR}${STRAND}.gff
+ result2_fn=/dev/null
+
+ /fml/ag-raetsch/home/fabio/svn/projects/splicing/gff/bin/psl2gff EST $alignment_file $CHR ${STRAND} $result1_fn $result2_fn $g_config 1>>LOG
+ done
+done
for c_name,current_chunk in chunks:
current_job = KybJob(grid_predict.g_predict,[run,dataset_fn,current_chunk,param,c_name])
- current_job.h_vmem = '12.0G'
+ current_job.h_vmem = '30.0G'
#current_job.express = 'True'
print "job #1: ", current_job.nativeSpecification
jp = os.path.join
- run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/run_enable_quality_scores_+_enable_splice_signals_+_enable_intron_length_+'
+ run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
+ run['name'] = 'saved_run'
+
param = cPickle.load(open(jp(run_dir,'param_526.pickle')))
dataset_fn = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/dataset_transcriptome_run_1.pickle'