+ added convenient script
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 14 Aug 2008 10:43:12 +0000 (10:43 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 14 Aug 2008 10:43:12 +0000 (10:43 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@10370 e1793c9e-67f9-0310-80fc-b846ff1f7b36

tools/run_specific_scripts/transcriptome_analysis/compare_predictions/compare.c
tools/run_specific_scripts/transcriptome_analysis/createExonInfoForGenefinding.py
tools/run_specific_scripts/transcriptome_analysis/createGenefindingInfo.sh [new file with mode: 0755]
tools/run_specific_scripts/transcriptome_analysis/createNewDataset.py

index 46ca423..fe51eaa 100644 (file)
@@ -106,14 +106,12 @@ void load_introns(char* fn, int** starts, int** stops, int* size) {
    size_t status;
 
    // we count the number of lines the file has
-   int line_ctr = 0;
-   while( getline(&current_line,&line_size,fs) != -1 ) line_ctr++;
+   *size = 0;
+   while( getline(&current_line,&line_size,fs) != -1 ) (*size)++;
 
-
-   *size = line_ctr;
    // now we allocate memory to store all positions
-   *starts  = malloc(sizeof(int)*line_ctr);
-   *stops   = malloc(sizeof(int)*line_ctr);
+   *starts  = malloc(sizeof(int)*(*size));
+   *stops   = malloc(sizeof(int)*(*size));
 
    if ( *starts == NULL || *stops == NULL )
       perror("Could not allocate memory for position arrays");
@@ -159,6 +157,7 @@ void load_introns(char* fn, int** starts, int** stops, int* size) {
       perror("Closing of filestream failed!");
 }
 
+
 int main(int argc, char* argv[]) {
 
    if(argc != 4) {
@@ -184,7 +183,6 @@ int main(int argc, char* argv[]) {
       exit(EXIT_FAILURE);
    }
 
-
    load_introns(gt_fn,&gt_intron_starts,&gt_intron_stops,&gt_size);
    load_introns(pred_fn,&pred_intron_starts,&pred_intron_stops,&pred_size);
 
@@ -192,7 +190,7 @@ int main(int argc, char* argv[]) {
 
    int f_status = fclose(result_fs);
    if(f_status != 0)
-      printf("closing of gff filestream failed!\n");
+      printf("closing of result filestream failed!\n");
 
    free(gt_fn);
    free(pred_fn);
@@ -201,4 +199,3 @@ int main(int argc, char* argv[]) {
    printf("Found %d matching intron(s).\n",matching_introns);
    exit(EXIT_SUCCESS);
 }
-
index eaf4fd6..dba0b67 100755 (executable)
@@ -72,5 +72,9 @@ def run(chunk_dir,outfile):
       #out_fh.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\n"%(chromo,strand,pp(starts),pp(ends),str(start_pos),pp(ids),pp(gaps)))
       out_fh.write("%d\t%s\t%s\t%s\t%s\t%s\n"%(chromo,strand,pp(starts),pp(ends),pp(ids),pp(gaps)))
 
+   
+   cmd = 'rm %s' % result_fn
+   os.system(cmd)
+
 if __name__ == '__main__':
    run(sys.argv[1],sys.argv[2])
diff --git a/tools/run_specific_scripts/transcriptome_analysis/createGenefindingInfo.sh b/tools/run_specific_scripts/transcriptome_analysis/createGenefindingInfo.sh
new file mode 100755 (executable)
index 0000000..1d58abe
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+for((idx=1;idx<4;idx++))
+do
+   current_dir=/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_${idx}
+   input=$current_dir/alignment 
+   result=$current_dir/alignment/alignmentInfo.genefinding
+   result2=$current_dir/alignment/alignmentInfo.genefinding.chr1_only
+   result3=$current_dir/alignment/alignmentInfo.genefinding.chr1_only.sorted
+   intron_stops=$current_dir/alignment/intron_stops
+   intron_starts=$current_dir/alignment/intron_starts
+   intron_info=$current_dir/alignment/intron_info
+
+   #python createExonInfoForGenefinding.py $input $result
+   #cat $result | grep '^1' > $result2
+   #cat $result2 | sort > $result3
+
+   for((chromo=1;chromo<6;chromo++))
+   do
+      for strand in "+" "-"
+      do
+         full_intron_stops=${intron_stops}_chr${chromo}_${strand}
+         full_intron_starts=${intron_starts}_chr${chromo}_${strand}
+         full_intron_info=${intron_info}_chr${chromo}_${strand}
+         cat $result | grep "^$chromo" | grep "        $strand " | cut -f3 | sed -e '/[,]\{1,1\}/!d' | cut -d ',' -f2 > $full_intron_stops
+         cat $result | grep "^$chromo" | grep "        $strand " | cut -f4 | sed -e '/[,]\{1,1\}/!d' | cut -d ',' -f1 > $full_intron_starts
+         paste --delimiters=" " $full_intron_starts $full_intron_stops > $full_intron_info
+         rm $full_intron_stops $full_intron_starts
+      done
+   done
+done
index 8dea2c5..19cb0c7 100644 (file)
@@ -12,8 +12,11 @@ def run():
 
    main_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/main'
 
-   spliced_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1'
-   result_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1/dataset'
+   #spliced_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1'
+   #result_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_1/dataset'
+
+   spliced_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_3'
+   result_dir = '/fml/ag-raetsch/home/fabio/tmp/vmatch_evaluation/spliced_3/dataset'
 
    map_1_fn = jp(main_dir,'map.vm.spliced')
    map_2_fn = jp(spliced_dir,'map.vm')