5 #include "datastructures.h"
7 void parse_gff(char *filename
, FILE* fid
,struct gene
** allGenes
) {
10 char* chr
= malloc(sizeof(char)*buffer_size
);
11 char* blah
= malloc(sizeof(char)*buffer_size
);
12 char* id
= malloc(sizeof(char)*buffer_size
);
13 char* desc
= malloc(sizeof(char)*buffer_size
);
16 char* xy
= malloc(sizeof(char)*4);
17 char* strand
= malloc(sizeof(char)*4);
18 char* xz
= malloc(sizeof(char)*4);
20 // do one pass through the gff line to determine the number of
26 status
= fscanf(fid
,"%s\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t%s\n",chr
,blah
,id
,&start
,&stop
,xy
,strand
,xz
,desc
);
30 if ( status
> 5 && strcmp(id
,"gene")==0)
33 freopen(filename
,"r",fid
);
35 printf("Found %d genes!\n",numGenes
);
37 allGenes
= malloc(sizeof(struct gene
)*numGenes
);
38 struct gene
*currentGene
= gene_alloc();
40 int skippedLinesCounter
= 0;
43 status
= fscanf(fid
,"%s\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t%s\n",chr
,blah
,id
,&start
,&stop
,xy
,strand
,xz
,desc
);
48 skippedLinesCounter
++;
52 if (strcmp(id
,"gene")==0) {
53 if ( currentGene
->start
!= -1)
54 allGenes
[idx
] = currentGene
;
57 currentGene
= gene_alloc();
58 currentGene
->start
= start
;
59 currentGene
->stop
= stop
;
60 currentGene
->strand
= (*strand
);
61 //printf("gene start/stop: %d/%d\n",start,stop);
65 if (strcmp(id
,"exon")==0) {
66 add_exon(currentGene
,start
,stop
);
67 //printf("exon start/stop: %d/%d\n",start,stop);
71 if (strcmp(id
,"pseudogene")==0) {
72 if ( currentGene
->start
!= -1)
73 allGenes
[idx
] = currentGene
;
78 if ( currentGene
->start
!= -1)
79 allGenes
[idx
] = currentGene
;