5 #include "datastructures.h"
7 int parse_gff(char *filename
, FILE* fid
,struct gene
*** allGenes
) {
10 char* chr
= malloc(sizeof(char)*buffer_size
);
11 char* blah
= malloc(sizeof(char)*buffer_size
);
12 char* id
= malloc(sizeof(char)*buffer_size
);
13 char* desc
= malloc(sizeof(char)*buffer_size
);
16 char* xy
= malloc(sizeof(char)*4);
17 char* strand
= malloc(sizeof(char)*4);
18 char* xz
= malloc(sizeof(char)*4);
20 // do one pass through the gff line to determine the number of
26 status
= fscanf(fid
,"%s\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t%s\n",chr
,blah
,id
,&start
,&stop
,xy
,strand
,xz
,desc
);
30 if ( status
> 5 && strcmp(id
,"gene")==0)
33 freopen(filename
,"r",fid
);
35 //printf("Found %d genes!\n",numGenes);
38 (*allGenes
) = malloc(sizeof(struct gene
*)*numGenes
);
39 (*allGenes
)[idx
] = NULL
;
41 int skippedLinesCounter
= 0;
43 status
= fscanf(fid
,"%s\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t%s\n",chr
,blah
,id
,&start
,&stop
,xy
,strand
,xz
,desc
);
48 skippedLinesCounter
++;
52 if (strcmp(id
,"gene")==0) {
53 if ( (*allGenes
)[idx
] !=NULL
)
56 (*allGenes
)[idx
] = gene_alloc();
57 (*allGenes
)[idx
]->start
= start
;
58 (*allGenes
)[idx
]->stop
= stop
;
59 (*allGenes
)[idx
]->strand
= (*strand
);
60 //printf("gene start/stop: %d/%d\n",start,stop);
64 if (strcmp(id
,"exon")==0) {
65 add_exon((*allGenes
)[idx
],start
,stop
);
66 //printf("exon start/stop: %d/%d\n",start,stop);
70 if (strcmp(id
,"pseudogene")==0) {
71 if ( (*allGenes
)[idx
] !=NULL
)
76 if ( (*allGenes
)[idx
] !=NULL
)
79 //printf("allGenes[0] is %d\n",(*allGenes)[0]);
80 //printf("allGenes[1] is %d\n",(*allGenes)[1]);
81 //printf("Skipped %d lines.\n",skippedLinesCounter);