int status;
int filenameSize = 256;
- char *gff_filename = malloc(sizeof(char)*filenameSize);
- char *reads_filename = malloc(sizeof(char)*filenameSize);
- char *output_filename = malloc(sizeof(char)*filenameSize);
+ char* gff_filename = malloc(sizeof(char)*filenameSize);
+ char* reads_filename = malloc(sizeof(char)*filenameSize);
+ char* output_filename = malloc(sizeof(char)*filenameSize);
strncpy(gff_filename,argv[1],filenameSize);
strncpy(reads_filename,argv[2],filenameSize);
struct gene** allGenes;
int numGenes = parse_gff(gff_filename,gff_fs,&allGenes);
status = fclose(gff_fs);
+ free(gff_filename);
if(status != 0)
printf("closing of gff filestream failed!\n");
printf("Successfully parsed gff file! Found %d genes.\n",numGenes);
process_reads(reads_fs,&allGenes,numGenes,out_fs);
-
status = fclose(reads_fs);
status = fclose(out_fs);
if(status != 0)
perror("fclose");
- //free(allGenes);
- free(gff_filename);
free(reads_filename);
+ free(output_filename);
return 0;
}
int skippedLinesCounter = 0;
+ int prev_exon_start = -1;
int prev_exon_stop = -1;
int cur_exon_start = -1;
int exon_idx = 1;
struct gene* currentGene = (*allGenes)[gene_idx];
+ char* disamb_seq = malloc(sizeof(char)*read_size);
+
int readCtr = 0;
- int old_gene_stop = -1;
- int old_pos = 0;
- char* posPtr;
- int new_chr = 0;
- int new_pos = 0;
- char* tmp_line = malloc(sizeof(char)*256);
// start of the parsing loop
while(1) {
if (gene_idx == numGenes || strcmp(current_line,"") == 0)
break;
if (readCtr != 0 && readCtr % 1000000 == 0)
- printf("Processed %d/%d genes and %d/%d reads.\n",readCtr,gene_idx,numGenes,readCtr,numReads);
+ printf("Processed %d reads. Processed %d/%d genes and %d/%d reads.\n",readCtr,gene_idx,numGenes,readCtr,numReads);
//if (gene_idx >= 1833)
// printf("currentGene start/stop: %d/%d. Positions is %d\n",currentGene->start,currentGene->stop,pos);
+ //if (readCtr == 2000000)
+ // exit(EXIT_SUCCESS);
- old_pos = pos;
status = sscanf(current_line,"%d\t%d\t%s\t%lu\t%c\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n",
&chr,&pos,seq,&id,&strand,&mismatch,&occurrence,&size,&cut,prb,cal_prb,chastity);
if (status < 12) {
if (!(currentGene->start <= pos && (pos + read_size-1) <= currentGene->stop)) { // read is not within gene borders
- if ( currentGene->stop < (pos + read_size-1) || currentGene->start < old_gene_stop ) { // go to next gene
+ if ( currentGene->stop < (pos + read_size-1) ) { // go to next gene
gene_idx++;
exon_idx = 1;
- old_gene_stop = currentGene->stop ;
currentGene = (*allGenes)[gene_idx];
//printf("currentGene->start / currentGene->stop %d/%d pos is %d\n",currentGene->start,currentGene->stop,pos);
ue = uo = ds = dov = 0;
continue;
}
- if ( pos < currentGene->start || pos < old_pos) { // go to next read
+ if ( pos < currentGene->start ) { // go to next read
next_read:
- //posPtr = linePtr;
- //while (1) {
- // printf("posPtr points to %c\n",*(char*)posPtr);
- // if ((*(char*)posPtr) == '\n') {
- // posPtr++;
- // tmp_line = strncpy(tmp_line,posPtr,256);
-
- // if (strcmp(tmp_line,"") == 0)
- // break;
-
- // sscanf(tmp_line,"%d\t%d\t",new_chr,new_pos);
- // printf("new_pos %d\n",new_pos);
- // if (new_pos >= currentGene->start) {
- // linePtr = posPtr;
- // break;
- // }
- // }
- // posPtr++;
- //}
- //printf("Went out!\n");
-
while (*(char*)linePtr != '\n') linePtr++;
linePtr++;
readCtr += 1;
current_line = strncpy(current_line,linePtr,256);
-
continue;
}
continue;
}
+ prev_exon_start = currentGene->exon_starts[exon_idx-1];
prev_exon_stop = currentGene->exon_stops[exon_idx-1];
cur_exon_start = currentGene->exon_starts[exon_idx];
+
+ //printf("exon %d %d inton til %d pos %d\n",prev_exon_start,prev_exon_stop,cur_exon_start,pos);
if (cur_exon_start - prev_exon_stop < 6 || cur_exon_start < pos ) { // go to next exon
exon_idx++;
goto exon_label;
}
+ if ( prev_exon_start < pos && (pos+read_size) < prev_exon_stop ) { // read is inside previous exon
+ remove_ambiguities(seq,strlen(seq),disamb_seq);
+ fprintf(out_fs,"%d\t%c\t%s\t%d\t%s\t%s\t%s\n",chr,strand,disamb_seq,read_size,prb,cal_prb,chastity);
+ goto next_read;
+ }
+
if ( pos + (read_size-1) < prev_exon_stop ) // go to next read
goto next_read;
status = munmap(reads_area,reads_filesize);
if(status != 0)
- printf("munmap failed!\n");
+ perror("munmap");
+ //free(current_line);
free(seq);
free(prb);
free(cal_prb);
char* new_prb = malloc(sizeof(char)*buffer_size);
char* new_cal_prb = malloc(sizeof(char)*buffer_size);
char* new_chastity = malloc(sizeof(char)*buffer_size);
+ char* new_up_seq = malloc(sizeof(char)*read_size);
+ char* new_down_seq = malloc(sizeof(char)*read_size);
up_idx=0;
down_idx=0;
&down_chr,&down_pos,down_seq,&down_id,&down_strand,&down_mismatch,&down_occurrence,&down_sz,
&down_cut,down_prb,down_cal_prb,down_chastity);
- char* new_up_seq = malloc(sizeof(char)*read_size);
- char* new_down_seq = malloc(sizeof(char)*read_size);
-
remove_ambiguities(up_seq,strlen(up_seq),new_up_seq);
remove_ambiguities(down_seq,strlen(down_seq),new_down_seq);
up_idx++;
down_idx++;
-
- free(new_up_seq);
- free(new_down_seq);
}
+
+ free(upstream_line);
+ free(downstream_line);
+
+ free(new_up_seq);
+ free(new_down_seq);
+
+ free(up_prb);
+ free(up_cal_prb);
+ free(up_chastity);
+
+ free(down_prb);
+ free(down_cal_prb);
+ free(down_chastity);
+
+ free(new_prb);
+ free(new_cal_prb);
+ free(new_chastity);
+
}
int fitting(char* up_prb, char* up_prb_end, char* down_prb, char* down_prb_end) {