126f287b1d2341fbbc8fb7b0bac5b4e195df4306
[qpalma.git] / tools / run_specific_scripts / transcriptome_analysis / compare_predictions / compare.c
1 // use macro for GNU getline
2 #define _GNU_SOURCE
3 #include <stdio.h>
4
5 #include <stdlib.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <math.h>
10
11 /*
12 * This program compares two lists of intron positions stemming for example from
13 * a gff file in order to evaluate predictions made by QPalma.
14 *
15 * It is assumed that the first file contains the ground truth and the second
16 * file the predicted positions.
17 *
18 */
19
20 int* gt_intron_starts;
21 int* gt_intron_stops;
22 int gt_size;
23
24 int* pred_intron_starts;
25 int* pred_intron_stops;
26 int pred_size;
27
28 int matching_introns;
29
30 static char* info = "Usage: ./<app name> <input 1 file name> <input 2 file name> <result file name>";
31
32
33 /*
34 * This function compares the intron boundaries of the two files.
35 *
36 */
37
38 void compare_introns() {
39
40 matching_introns = 0;
41
42 int gt_idx = 0;
43 int pred_idx = 0;
44 int gt_start,gt_stop,pred_start,pred_stop;
45
46 printf("sizes are %d/%d\n",gt_size,pred_size);
47
48 while(1) {
49 if (gt_idx == gt_size || pred_idx == pred_size)
50 break;
51
52 gt_start = gt_intron_starts[gt_idx];
53 gt_stop = gt_intron_stops[gt_idx];
54
55 pred_start = pred_intron_starts[pred_idx];
56 pred_stop = pred_intron_stops[pred_idx];
57
58 //printf("pos: %d/%d %d/%d\n",gt_start,gt_stop,pred_start,pred_stop);
59 if (gt_start == pred_start && gt_stop == pred_stop) {
60 matching_introns++;
61 gt_idx++;
62 pred_idx++;
63 continue;
64 }
65
66 // check for all possible overlappings / positions
67 if ( gt_stop <= pred_start ) {
68 gt_idx++;
69 continue;
70 }
71
72 if ( pred_stop <= gt_start ) {
73 pred_idx++;
74 continue;
75 }
76
77 if (gt_stop <= pred_stop && gt_stop >= pred_start) {
78 gt_idx++;
79 continue;
80 }
81
82 if (pred_stop <= gt_stop && pred_stop >= gt_start) {
83 pred_idx++;
84 continue;
85 }
86 }
87 }
88
89
90 /*
91 *
92 *
93 *
94 */
95
96 void load_introns(char* fn, int** starts, int** stops, int* size) {
97
98 FILE *fs = fopen(fn,"r");
99 if(fs == NULL) {
100 printf("Error: Could not open file: %s",fn);
101 exit(EXIT_FAILURE);
102 }
103
104 size_t line_size = 256;
105 char* current_line = malloc(sizeof(char)*line_size);
106 size_t status;
107
108 // we count the number of lines the file has
109 int line_ctr = 0;
110 while( getline(&current_line,&line_size,fs) != -1 ) line_ctr++;
111
112
113 *size = line_ctr;
114 // now we allocate memory to store all positions
115 *starts = malloc(sizeof(int)*line_ctr);
116 *stops = malloc(sizeof(int)*line_ctr);
117
118 if ( *starts == NULL || *stops == NULL )
119 perror("Could not allocate memory for position arrays");
120
121 fs = freopen(fn,"r",fs);
122 if(fs == NULL) {
123 printf("Error: Could not open file: %s",fn);
124 exit(EXIT_FAILURE);
125 }
126
127 char* first_num = malloc(sizeof(char)*line_size);
128 char* second_num = malloc(sizeof(char)*line_size);
129
130 int idx;
131 int intron_start;
132 int intron_stop;
133 int ctr = 0;
134
135 while(1) {
136 status = getline(&current_line,&line_size,fs);
137 if ( status == -1 )
138 break;
139
140 //printf("%s",current_line);
141
142 for(idx=0;idx<line_size;idx++) {
143 if (current_line[idx] == ' ')
144 break;
145 }
146 strncpy(first_num,current_line,idx);
147 strncpy(second_num,&current_line[idx],line_size-idx);
148 intron_start = atoi(first_num);
149 intron_stop = atoi(second_num);
150 //printf("start/stop: %d/%d\n",intron_start,intron_stop);
151
152 (*starts)[ctr] = intron_start;
153 (*stops)[ctr] = intron_stop;
154 ctr++;
155 }
156
157 printf("ctr is %d\n",ctr);
158 if (fclose(fs) != 0)
159 perror("Closing of filestream failed!");
160 }
161
162 int main(int argc, char* argv[]) {
163
164 if(argc != 4) {
165 printf("%s\n",info);
166 exit(EXIT_FAILURE);
167 }
168
169 int filenameSize = 256;
170 char* gt_fn = malloc(sizeof(char)*filenameSize);
171 char* pred_fn = malloc(sizeof(char)*filenameSize);
172 char* result_fn= malloc(sizeof(char)*filenameSize);
173
174 if ( gt_fn == NULL || pred_fn == NULL || result_fn == NULL)
175 perror("Could not allocate memory for filenames");
176
177 strncpy(gt_fn,argv[1],strlen(argv[1]));
178 strncpy(pred_fn,argv[2],strlen(argv[2]));
179 strncpy(result_fn,argv[3],strlen(argv[3]));
180
181 FILE *result_fs = fopen(result_fn,"r");
182 if(result_fs == NULL) {
183 printf("Error: Could not open file: %s",result_fn);
184 exit(EXIT_FAILURE);
185 }
186
187
188 load_introns(gt_fn,&gt_intron_starts,&gt_intron_stops,&gt_size);
189 load_introns(pred_fn,&pred_intron_starts,&pred_intron_stops,&pred_size);
190
191 compare_introns();
192
193 int f_status = fclose(result_fs);
194 if(f_status != 0)
195 printf("closing of gff filestream failed!\n");
196
197 free(gt_fn);
198 free(pred_fn);
199 free(result_fn);
200
201 printf("Found %d matching intron(s).\n",matching_introns);
202 exit(EXIT_SUCCESS);
203 }
204