minor changes for cuda 3.2
[RBC.git] / driver.cu
1 /* This file is part of the Random Ball Cover (RBC) library.
2 * (C) Copyright 2010, Lawrence Cayton [lcayton@tuebingen.mpg.de]
3 */
4
5 #include<stdio.h>
6 #include<stdlib.h>
7 #include<cuda.h>
8 #include<sys/time.h>
9 #include<math.h>
10 #include "defs.h"
11 #include "utils.h"
12 #include "utilsGPU.h"
13 #include "rbc.h"
14 #include "brute.h"
15 #include "sKernel.h"
16
17 void parseInput(int,char**);
18 void readData(char*,unint,unint,real*);
19 void readDataText(char*,unint,unint,real*);
20 void orgData(real*,unint,unint,matrix,matrix);
21 void evalNNerror(matrix, matrix, unint*);
22 void evalKNNerror(matrix,matrix,intMatrix);
23
24 char *dataFile, *outFile;
25 unint n=0, m=0, d=0, numReps=0, s=0;
26 unint deviceNum=0;
27 int main(int argc, char**argv){
28 real *data;
29 matrix x, q;
30 unint *NNs;
31 intMatrix NNsK, kNNsRBC;
32 unint i;
33 struct timeval tvB,tvE;
34 cudaError_t cE;
35 rbcStruct rbcS;
36
37 printf("*****************\n");
38 printf("RANDOM BALL COVER\n");
39 printf("*****************\n");
40
41 parseInput(argc,argv);
42
43 printf("Using GPU #%d\n",deviceNum);
44 if(cudaSetDevice(deviceNum) != cudaSuccess){
45 printf("Unable to select device %d.. exiting. \n",deviceNum);
46 exit(1);
47 }
48
49 size_t memFree, memTot;
50 cudaMemGetInfo(&memFree, &memTot);
51 printf("GPU memory free = %lu/%lu (MB) \n",(unsigned long)memFree/(1024*1024),(unsigned long)memTot/(1024*1024));
52
53 data = (real*)calloc( (n+m)*d, sizeof(*data) );
54 x.mat = (real*)calloc( PAD(n)*PAD(d), sizeof(*(x.mat)) );
55
56 //Need to allocate extra space, as each group of q will be padded later.
57 q.mat = (real*)calloc( PAD(m)*PAD(d), sizeof(*(q.mat)) );
58 x.r = n; x.c = d; x.pr = PAD(n); x.pc = PAD(d); x.ld = x.pc;
59 q.r = m; q.c = d; q.pr = PAD(m); q.pc = PAD(d); q.ld = q.pc;
60
61 NNs = (unint*)calloc( m, sizeof(*NNs) );
62 for(i=0; i<m; i++)
63 NNs[i]=DUMMY_IDX;
64
65 readData(dataFile, (n+m), d, data);
66 orgData(data, (n+m), d, x, q);
67 free(data);
68
69 NNsK.r=q.r; NNsK.pr=q.pr; NNsK.pc=NNsK.c=K; NNsK.ld=NNsK.pc;
70 kNNsRBC.r=q.r; kNNsRBC.pr=q.pr; kNNsRBC.pc=kNNsRBC.c=K; kNNsRBC.ld=kNNsRBC.pc;
71 kNNsRBC.mat = (unint*)calloc(kNNsRBC.pr*kNNsRBC.pc, sizeof(*kNNsRBC.mat));
72 NNsK.mat = (unint*)calloc(NNsK.pr*NNsK.pc, sizeof(*NNsK.mat));
73
74 /* printf("running k-brute force..\n"); */
75 /* gettimeofday(&tvB,NULL); */
76 /* bruteK(x,q,NNsK); */
77 /* gettimeofday(&tvE,NULL); */
78 /* printf("\t.. time elapsed = %6.4f \n",timeDiff(tvB,tvE)); */
79
80 printf("\nrunning rbc..\n");
81 gettimeofday(&tvB,NULL);
82 buildRBC(x, &rbcS, numReps, s);
83 gettimeofday(&tvE,NULL);
84 printf("\t.. build time for rbc = %6.4f \n",timeDiff(tvB,tvE));
85
86 //This finds the 32-NN; if you are only interested in the 1-NN, use queryRBC(..) instead
87 gettimeofday(&tvB,NULL);
88 kqueryRBC(q, rbcS, kNNsRBC);
89 gettimeofday(&tvE,NULL);
90 printf("\t.. query time for krbc = %6.4f \n",timeDiff(tvB,tvE));
91
92 destroyRBC(&rbcS);
93 printf("finished \n");
94
95 cE = cudaGetLastError();
96 if( cE != cudaSuccess ){
97 printf("Execution failed; error type: %s \n", cudaGetErrorString(cE) );
98 }
99
100 evalKNNerror(x,q,kNNsRBC);
101
102 cudaThreadExit();
103
104 free(NNs);
105 free(NNsK.mat);
106 free(kNNsRBC.mat);
107 free(x.mat);
108 free(q.mat);
109 }
110
111
112 void parseInput(int argc, char **argv){
113 int i=1;
114 if(argc <= 1){
115 printf("\nusage: \n testRBC -f datafile (bin) -n numPts (DB) -m numQueries -d dim -r numReps -s numPtsPerRep [-o outFile] [-g GPU num]\n\n");
116 printf("\tdatafile = binary file containing the data\n");
117 printf("\tnumPts = size of database\n");
118 printf("\tnumQueries = number of queries\n");
119 printf("\tdim = dimensionailty\n");
120 printf("\tnumReps = number of representatives\n");
121 printf("\tnumPtsPerRep = number of points assigned to each representative\n");
122 printf("\toutFile = output file (optional); stored in text format\n");
123 printf("\tGPU num = ID # of the GPU to use (optional) for multi-GPU machines\n");
124 printf("\n\n");
125 exit(0);
126 }
127
128 while(i<argc){
129 if(!strcmp(argv[i], "-f"))
130 dataFile = argv[++i];
131 else if(!strcmp(argv[i], "-n"))
132 n = atoi(argv[++i]);
133 else if(!strcmp(argv[i], "-m"))
134 m = atoi(argv[++i]);
135 else if(!strcmp(argv[i], "-d"))
136 d = atoi(argv[++i]);
137 else if(!strcmp(argv[i], "-r"))
138 numReps = atoi(argv[++i]);
139 else if(!strcmp(argv[i], "-s"))
140 s = atoi(argv[++i]);
141 else if(!strcmp(argv[i], "-o"))
142 outFile = argv[++i];
143 else if(!strcmp(argv[i], "-g"))
144 deviceNum = atoi(argv[++i]);
145 else{
146 fprintf(stderr,"%s : unrecognized option.. exiting\n",argv[i]);
147 exit(1);
148 }
149 i++;
150 }
151
152 if( !n || !m || !d || !numReps || !s || !dataFile){
153 fprintf(stderr,"more arguments needed.. exiting\n");
154 exit(1);
155 }
156
157 if(numReps>n){
158 fprintf(stderr,"can't have more representatives than points.. exiting\n");
159 exit(1);
160 }
161 }
162
163
164 void readData(char *dataFile, unint rows, unint cols, real *data){
165 FILE *fp;
166 unint numRead;
167
168 fp = fopen(dataFile,"r");
169 if(fp==NULL){
170 fprintf(stderr,"error opening file.. exiting\n");
171 exit(1);
172 }
173
174 numRead = fread(data,sizeof(real),rows*cols,fp);
175 if(numRead != rows*cols){
176 fprintf(stderr,"error reading file.. exiting \n");
177 exit(1);
178 }
179 fclose(fp);
180 }
181
182
183 void readDataText(char *dataFile, unint rows, unint cols, real *data){
184 FILE *fp;
185 real t;
186
187 fp = fopen(dataFile,"r");
188 if(fp==NULL){
189 fprintf(stderr,"error opening file.. exiting\n");
190 exit(1);
191 }
192
193 for(int i=0; i<rows; i++){
194 for(int j=0; j<cols; j++){
195 if(fscanf(fp,"%f ", &t)==EOF){
196 fprintf(stderr,"error reading file.. exiting \n");
197 exit(1);
198 }
199 data[IDX( i, j, cols )]=(real)t;
200 }
201 }
202 fclose(fp);
203 }
204
205 //This function splits the data into two matrices, x and q, of
206 //their specified dimensions. The data is split randomly.
207 //It is assumed that the number of rows of data (the parameter n)
208 //is at least as large as x.r+q.r
209 void orgData(real *data, unint n, unint d, matrix x, matrix q){
210
211 unint i,fi,j;
212 unint *p;
213 p = (unint*)calloc(n,sizeof(*p));
214
215 randPerm(n,p);
216
217 for(i=0,fi=0 ; i<x.r ; i++,fi++){
218 for(j=0;j<x.c;j++){
219 x.mat[IDX(i,j,x.ld)] = data[IDX(p[fi],j,d)];
220 }
221 }
222
223 for(i=0 ; i<q.r ; i++,fi++){
224 for(j=0;j<q.c;j++){
225 q.mat[IDX(i,j,q.ld)] = data[IDX(p[fi],j,d)];
226 }
227 }
228
229 free(p);
230 }
231
232
233 //find the error rate of a set of NNs, then print it.
234 void evalNNerror(matrix x, matrix q, unint *NNs){
235 struct timeval tvB, tvE;
236 unint i;
237
238 printf("\nComputing error rates (this might take a while)\n");
239 real *ranges = (real*)calloc(q.pr,sizeof(*ranges));
240 for(i=0;i<q.r;i++){
241 if(NNs[i]>n) printf("error");
242 ranges[i] = distVec(q,x,i,NNs[i]) - 10e-6;
243 }
244
245 unint *cnts = (unint*)calloc(q.pr,sizeof(*cnts));
246 gettimeofday(&tvB,NULL);
247 bruteRangeCount(x,q,ranges,cnts);
248 gettimeofday(&tvE,NULL);
249
250 long int nc=0;
251 for(i=0;i<m;i++){
252 nc += cnts[i];
253 }
254 double mean = ((double)nc)/((double)m);
255 double var = 0.0;
256 for(i=0;i<m;i++) {
257 var += (((double)cnts[i])-mean)*(((double)cnts[i])-mean)/((double)m);
258 }
259 printf("\tavg rank = %6.4f; std dev = %6.4f \n\n", mean, sqrt(var));
260 printf("(range count took %6.4f) \n", timeDiff(tvB, tvE));
261
262 if(outFile){
263 FILE* fp = fopen(outFile, "a");
264 fprintf( fp, "%d %d %6.5f %6.5f \n", numReps, s, mean, sqrt(var) );
265 fclose(fp);
266 }
267
268 free(ranges);
269 free(cnts);
270 }
271
272
273 //evals the error rate of k-nns
274 void evalKNNerror(matrix x, matrix q, intMatrix NNs){
275 struct timeval tvB, tvE;
276 unint i,j,k;
277
278 unint m = q.r;
279 printf("\nComputing error rates (this might take a while)\n");
280
281 unint *ol = (unint*)calloc( q.r, sizeof(*ol) );
282
283 intMatrix NNsB;
284 NNsB.r=q.r; NNsB.pr=q.pr; NNsB.c=NNsB.pc=32; NNsB.ld=NNsB.pc;
285 NNsB.mat = (unint*)calloc( NNsB.pr*NNsB.pc, sizeof(*NNsB.mat) );
286
287 gettimeofday(&tvB,NULL);
288 bruteK(x,q,NNsB);
289 gettimeofday(&tvE,NULL);
290
291 //calc overlap
292 for(i=0; i<m; i++){
293 for(j=0; j<K; j++){
294 for(k=0; k<K; k++){
295 ol[i] += ( NNs.mat[IDX(i, j, NNs.ld)] == NNsB.mat[IDX(i, k, NNsB.ld)] );
296 }
297 }
298 }
299
300 long int nc=0;
301 for(i=0;i<m;i++){
302 nc += ol[i];
303 }
304
305 double mean = ((double)nc)/((double)m);
306 double var = 0.0;
307 for(i=0;i<m;i++) {
308 var += (((double)ol[i])-mean)*(((double)ol[i])-mean)/((double)m);
309 }
310 printf("\tavg overlap = %6.4f/%d; std dev = %6.4f \n", mean, K, sqrt(var));
311
312 FILE* fp;
313 if(outFile){
314 fp = fopen(outFile, "a");
315 fprintf( fp, "%d %d %6.5f %6.5f ", numReps, s, mean, sqrt(var) );
316 }
317
318 real *ranges = (real*)calloc(q.pr,sizeof(*ranges));
319 for(i=0;i<q.r;i++){
320 ranges[i] = distVec(q,x,i,NNs.mat[IDX(i, K-1, NNs.ld)]);
321 }
322
323 unint *cnts = (unint*)calloc(q.pr,sizeof(*cnts));
324 bruteRangeCount(x,q,ranges,cnts);
325
326 nc=0;
327 for(i=0;i<m;i++){
328 nc += cnts[i];
329 }
330 mean = ((double)nc)/((double)m);
331 var = 0.0;
332 for(i=0;i<m;i++) {
333 var += (((double)cnts[i])-mean)*(((double)cnts[i])-mean)/((double)m);
334 }
335 printf("\tavg actual rank of 32nd NN returned by the RBC = %6.4f; std dev = %6.4f \n\n", mean, sqrt(var));
336 printf("(brute k-nn took %6.4f) \n", timeDiff(tvB, tvE));
337
338 if(outFile){
339 fprintf( fp, "%6.5f %6.5f \n", mean, sqrt(var) );
340 fclose(fp);
341 }
342
343 free(cnts);
344 free(ol);
345 free(NNsB.mat);
346 }