+ loosened filtering criterion
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sat, 22 Dec 2007 16:14:34 +0000 (16:14 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Sat, 22 Dec 2007 16:14:34 +0000 (16:14 +0000)
+ fixed memory leaks

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7210 e1793c9e-67f9-0310-80fc-b846ff1f7b36

tools/data_tools/filterReads.c

index bbd54b4..49a0bf0 100644 (file)
@@ -40,9 +40,9 @@ int main(int argc, char* argv[]) {
 
    int status;
    int filenameSize = 256;
-   char *gff_filename = malloc(sizeof(char)*filenameSize);
-   char *reads_filename = malloc(sizeof(char)*filenameSize);
-   char *output_filename = malloc(sizeof(char)*filenameSize);
+   chargff_filename = malloc(sizeof(char)*filenameSize);
+   charreads_filename = malloc(sizeof(char)*filenameSize);
+   charoutput_filename = malloc(sizeof(char)*filenameSize);
 
    strncpy(gff_filename,argv[1],filenameSize);
    strncpy(reads_filename,argv[2],filenameSize);
@@ -70,21 +70,20 @@ int main(int argc, char* argv[]) {
    struct gene** allGenes;
    int numGenes = parse_gff(gff_filename,gff_fs,&allGenes);
    status = fclose(gff_fs);
+   free(gff_filename);
    if(status != 0)
       printf("closing of gff filestream failed!\n");
 
    printf("Successfully parsed gff file! Found %d genes.\n",numGenes);
 
    process_reads(reads_fs,&allGenes,numGenes,out_fs);
-
    status = fclose(reads_fs);
    status = fclose(out_fs);
    if(status != 0)
       perror("fclose");
       
-   //free(allGenes);
-   free(gff_filename);
    free(reads_filename);
+   free(output_filename);
    return 0;
 }
 
@@ -137,6 +136,7 @@ void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* ou
 
    int skippedLinesCounter = 0;
 
+   int prev_exon_start = -1;
    int prev_exon_stop = -1;
    int cur_exon_start = -1;
 
@@ -145,25 +145,22 @@ void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* ou
    int exon_idx = 1;
    struct gene* currentGene = (*allGenes)[gene_idx];
 
+   char* disamb_seq = malloc(sizeof(char)*read_size);
+
    int readCtr = 0;
-   int old_gene_stop = -1;
-   int old_pos = 0;
-   char* posPtr;
-   int new_chr = 0;
-   int new_pos = 0;
-   char* tmp_line = malloc(sizeof(char)*256);
    // start of the parsing loop 
    while(1) {
       if (gene_idx == numGenes || strcmp(current_line,"") == 0)
          break;
 
       if (readCtr != 0 && readCtr % 1000000 == 0)
-         printf("Processed %d/%d genes and %d/%d reads.\n",readCtr,gene_idx,numGenes,readCtr,numReads);
+         printf("Processed %d reads. Processed %d/%d genes and %d/%d reads.\n",readCtr,gene_idx,numGenes,readCtr,numReads);
 
       //if (gene_idx >= 1833)
       //   printf("currentGene start/stop: %d/%d. Positions is %d\n",currentGene->start,currentGene->stop,pos);
+      //if (readCtr == 2000000)
+      //   exit(EXIT_SUCCESS);
 
-      old_pos = pos;
       status = sscanf(current_line,"%d\t%d\t%s\t%lu\t%c\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n",
       &chr,&pos,seq,&id,&strand,&mismatch,&occurrence,&size,&cut,prb,cal_prb,chastity);
       if (status < 12) {
@@ -181,45 +178,22 @@ void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* ou
 
       if (!(currentGene->start <= pos && (pos + read_size-1) <= currentGene->stop)) { // read is not within gene borders
 
-         if ( currentGene->stop < (pos + read_size-1) || currentGene->start < old_gene_stop ) { // go to next gene
+         if ( currentGene->stop < (pos + read_size-1) ) { // go to next gene
             gene_idx++;
             exon_idx = 1;
-            old_gene_stop = currentGene->stop ;
             currentGene = (*allGenes)[gene_idx];
             //printf("currentGene->start / currentGene->stop %d/%d pos is %d\n",currentGene->start,currentGene->stop,pos);
             ue = uo = ds = dov = 0;
             continue;
          }
 
-         if ( pos < currentGene->start || pos < old_pos) { // go to next read
+         if ( pos < currentGene->start ) { // go to next read
             next_read:
 
-            //posPtr = linePtr;
-            //while (1) {
-            //   printf("posPtr points to %c\n",*(char*)posPtr);
-            //   if ((*(char*)posPtr) == '\n') {
-            //      posPtr++;
-            //      tmp_line = strncpy(tmp_line,posPtr,256);
-
-            //      if (strcmp(tmp_line,"") == 0)
-            //         break;
-
-            //      sscanf(tmp_line,"%d\t%d\t",new_chr,new_pos);
-            //      printf("new_pos %d\n",new_pos);
-            //      if (new_pos >= currentGene->start) {
-            //         linePtr = posPtr;
-            //         break;
-            //      }
-            //   }
-            //   posPtr++;
-            //}
-            //printf("Went out!\n");
-
             while (*(char*)linePtr != '\n') linePtr++;
             linePtr++;
             readCtr += 1;
             current_line = strncpy(current_line,linePtr,256);
-            
             continue;
          }
 
@@ -234,8 +208,11 @@ void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* ou
             continue;
          }
 
+         prev_exon_start = currentGene->exon_starts[exon_idx-1];
          prev_exon_stop = currentGene->exon_stops[exon_idx-1];
          cur_exon_start = currentGene->exon_starts[exon_idx];
+
+         //printf("exon %d %d inton til %d pos %d\n",prev_exon_start,prev_exon_stop,cur_exon_start,pos);
             
          if (cur_exon_start - prev_exon_stop < 6 || cur_exon_start < pos ) { // go to next exon
             exon_idx++;
@@ -250,6 +227,12 @@ void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* ou
             goto exon_label;
          }
 
+         if ( prev_exon_start < pos && (pos+read_size) < prev_exon_stop ) { // read is inside previous exon
+            remove_ambiguities(seq,strlen(seq),disamb_seq);
+            fprintf(out_fs,"%d\t%c\t%s\t%d\t%s\t%s\t%s\n",chr,strand,disamb_seq,read_size,prb,cal_prb,chastity);
+            goto next_read;
+         }
+
          if ( pos + (read_size-1) < prev_exon_stop ) // go to next read
             goto next_read;
 
@@ -295,8 +278,9 @@ void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* ou
 
    status = munmap(reads_area,reads_filesize);
    if(status != 0)
-      printf("munmap failed!\n");
+      perror("munmap");
 
+   //free(current_line);
    free(seq);
    free(prb);
    free(cal_prb);
@@ -342,6 +326,8 @@ void combine_info(int exon_stop, int exon_start, void** upstream, int up_size, v
    char* new_prb        = malloc(sizeof(char)*buffer_size);
    char* new_cal_prb    = malloc(sizeof(char)*buffer_size);
    char* new_chastity   = malloc(sizeof(char)*buffer_size);
+   char* new_up_seq = malloc(sizeof(char)*read_size);
+   char* new_down_seq = malloc(sizeof(char)*read_size); 
 
    up_idx=0;
    down_idx=0;
@@ -359,9 +345,6 @@ void combine_info(int exon_stop, int exon_start, void** upstream, int up_size, v
       &down_chr,&down_pos,down_seq,&down_id,&down_strand,&down_mismatch,&down_occurrence,&down_sz,
       &down_cut,down_prb,down_cal_prb,down_chastity);
 
-      char* new_up_seq = malloc(sizeof(char)*read_size);
-      char* new_down_seq = malloc(sizeof(char)*read_size); 
-
       remove_ambiguities(up_seq,strlen(up_seq),new_up_seq);
       remove_ambiguities(down_seq,strlen(down_seq),new_down_seq);
 
@@ -431,10 +414,26 @@ void combine_info(int exon_stop, int exon_start, void** upstream, int up_size, v
 
       up_idx++;
       down_idx++;
-
-      free(new_up_seq);
-      free(new_down_seq);
    }
+
+   free(upstream_line);
+   free(downstream_line);
+  
+   free(new_up_seq);
+   free(new_down_seq); 
+
+   free(up_prb);
+   free(up_cal_prb);
+   free(up_chastity);
+
+   free(down_prb);
+   free(down_cal_prb);
+   free(down_chastity);
+
+   free(new_prb);
+   free(new_cal_prb);
+   free(new_chastity);
+
 }
 
 int fitting(char* up_prb, char* up_prb_end, char* down_prb, char* down_prb_end) {