added offset sorting routine
[RBC.git] / driver.cu
1 /* This file is part of the Random Ball Cover (RBC) library.
2 * (C) Copyright 2010, Lawrence Cayton [lcayton@tuebingen.mpg.de]
3 */
4
5 #include<stdio.h>
6 #include<stdlib.h>
7 #include<cuda.h>
8 #include<sys/time.h>
9 #include<math.h>
10 #include "defs.h"
11 #include "utils.h"
12 #include "utilsGPU.h"
13 #include "rbc.h"
14 #include "brute.h"
15 #include "sKernel.h"
16
17 void parseInput(int,char**);
18 void readData(char*,unint,unint,real*);
19 void readDataText(char*,unint,unint,real*);
20 void orgData(real*,unint,unint,matrix,matrix);
21
22
23 char *dataFile, *outFile;
24 unint n=0, m=0, d=0, numReps=0, s=0;
25 unint deviceNum=0;
26 int main(int argc, char**argv){
27 real *data;
28 matrix x, q;
29 unint *NNs;
30 intMatrix NNsK, NNsKCPU;
31 unint i;
32 struct timeval tvB,tvE;
33 cudaError_t cE;
34 rbcStruct rbcS;
35
36 printf("*****************\n");
37 printf("RANDOM BALL COVER\n");
38 printf("*****************\n");
39
40 parseInput(argc,argv);
41
42 cuInit(0);
43 printf("Using GPU #%d\n",deviceNum);
44 if(cudaSetDevice(deviceNum) != cudaSuccess){
45 printf("Unable to select device %d.. exiting. \n",deviceNum);
46 exit(1);
47 }
48
49 unsigned int memFree, memTot;
50 CUcontext pctx;
51 unsigned int flags=0;
52 int device;
53 cudaGetDevice(&device);
54 cuCtxCreate(&pctx,flags,device);
55 cuMemGetInfo(&memFree, &memTot);
56 printf("GPU memory free = %u/%u (MB) \n",memFree/(1024*1024),memTot/(1024*1024));
57
58 data = (real*)calloc( (n+m)*d, sizeof(*data) );
59 x.mat = (real*)calloc( PAD(n)*PAD(d), sizeof(*(x.mat)) );
60
61 //Need to allocate extra space, as each group of q will be padded later.
62 q.mat = (real*)calloc( PAD(m)*PAD(d), sizeof(*(q.mat)) );
63 x.r = n; x.c = d; x.pr = PAD(n); x.pc = PAD(d); x.ld = x.pc;
64 q.r = m; q.c = d; q.pr = PAD(m); q.pc = PAD(d); q.ld = q.pc;
65
66 NNs = (unint*)calloc( m, sizeof(*NNs) );
67 for(i=0; i<m; i++)
68 NNs[i]=DUMMY_IDX;
69
70 readData(dataFile, (n+m), d, data);
71 orgData(data, (n+m), d, x, q);
72 free(data);
73
74 for(i=0;i<m;i++)
75 NNs[i]=DUMMY_IDX;
76
77 NNsK.r=q.r; NNsK.pr=q.pr; NNsK.pc=NNsK.c=K; NNsK.ld=NNsK.pc;
78 NNsKCPU.r=q.r; NNsKCPU.pr=q.pr; NNsKCPU.pc=NNsKCPU.c=K; NNsKCPU.ld=NNsKCPU.pc;
79 NNsK.mat = (unint*)calloc(NNsK.pr*NNsK.pc, sizeof(*NNsK.mat));
80 NNsKCPU.mat = (unint*)calloc(NNsKCPU.pr*NNsKCPU.pc, sizeof(*NNsKCPU.mat));
81
82 int tester[5][5];
83 int k=0;
84 int j;
85 for(i=0;i<5;i++){
86 for(j=0;j<5;j++){
87 tester[i][j]=k++;
88 }
89 }
90
91
92 printf("running k-brute force..\n");
93 gettimeofday(&tvB,NULL);
94 bruteK(x,q,NNsK);
95 gettimeofday(&tvE,NULL);
96 printf("\t.. time elapsed = %6.4f \n",timeDiff(tvB,tvE));
97
98 printf("running regular brute force..\n");
99 gettimeofday(&tvB,NULL);
100 bruteSearch(x,q,NNs);
101 gettimeofday(&tvE,NULL);
102 printf("\t.. time elapsed = %6.4f \n",timeDiff(tvB,tvE));
103
104
105 printf("running cpu version..\n");
106 gettimeofday(&tvB,NULL);
107 bruteKCPU(x,q,NNsKCPU);
108 gettimeofday(&tvE,NULL);
109 printf("\t.. time elapsed = %6.4f \n",timeDiff(tvB,tvE));
110
111
112 /* printf("\nrunning rbc..\n"); */
113 /* gettimeofday(&tvB,NULL); */
114 /* buildRBC(x, &rbcS, numReps, s); */
115 /* gettimeofday(&tvE,NULL); */
116 /* printf("\t.. build time for rbc = %6.4f \n",timeDiff(tvB,tvE)); */
117
118 /* gettimeofday(&tvB,NULL); */
119 /* queryRBC(q, rbcS, NNs); */
120 /* gettimeofday(&tvE,NULL); */
121 /* printf("\t.. query time for rbc = %6.4f \n",timeDiff(tvB,tvE)); */
122
123 /* destroyRBC(&rbcS); */
124 /* printf("finished \n") */;
125
126 cE = cudaGetLastError();
127 if( cE != cudaSuccess ){
128 printf("Execution failed; error type: %s \n", cudaGetErrorString(cE) );
129 }
130
131 for(i=0;i<q.r;i++){
132 int j;
133 for(j=0;j<K;j++){
134 if(NNsK.mat[IDX(i,j,NNsK.ld)] != NNsKCPU.mat[IDX(i,j,NNsKCPU.ld)])
135 printf("%d %d: (%6.2f %6.2f) ",i,j,distVec(q,x,i,NNsK.mat[IDX(i,j,NNsK.ld)]),distVec(q,x,i,NNsKCPU.mat[IDX(i,j,NNsKCPU.ld)]));
136 }
137 /* printf("\n"); */
138 }
139
140 /* printf("\nComputing error rates (this might take a while)\n"); */
141 /* real *ranges = (real*)calloc(q.pr,sizeof(*ranges)); */
142 /* for(i=0;i<q.r;i++){ */
143 /* if(NNs[i]>n) printf("error"); */
144 /* ranges[i] = distVec(q,x,i,NNs[i]) - 10e-6; */
145 /* } */
146
147 /* unint *cnts = (unint*)calloc(q.pr,sizeof(*cnts)); */
148 /* gettimeofday(&tvB,NULL); */
149 /* bruteRangeCount(x,q,ranges,cnts); */
150 /* gettimeofday(&tvE,NULL); */
151
152 /* long int nc=0; */
153 /* for(i=0;i<m;i++){ */
154 /* nc += cnts[i]; */
155 /* } */
156 /* double mean = ((double)nc)/((double)m); */
157 /* double var = 0.0; */
158 /* for(i=0;i<m;i++) { */
159 /* var += (((double)cnts[i])-mean)*(((double)cnts[i])-mean)/((double)m); */
160 /* } */
161 /* printf("\tavg rank = %6.4f; std dev = %6.4f \n\n", mean, sqrt(var)); */
162 /* printf("(range count took %6.4f) \n", timeDiff(tvB, tvE)); */
163
164
165 /* if(outFile){ */
166 /* FILE* fp = fopen(outFile, "a"); */
167 /* fprintf( fp, "%d %d %6.5f %6.5f \n", numReps, s, mean, sqrt(var) ); */
168 /* fclose(fp); */
169 /* } */
170
171 /* free(ranges); */
172 /* free(cnts); */
173 free(NNs);
174 free(NNsK.mat);
175 free(NNsKCPU.mat);
176 free(x.mat);
177 free(q.mat);
178 }
179
180
181 void parseInput(int argc, char **argv){
182 int i=1;
183 if(argc <= 1){
184 printf("\nusage: \n testRBC -f datafile (bin) -n numPts (DB) -m numQueries -d dim -r numReps -s numPtsPerRep [-o outFile] [-g GPU num]\n\n");
185 printf("\tdatafile = binary file containing the data\n");
186 printf("\tnumPts = size of database\n");
187 printf("\tnumQueries = number of queries\n");
188 printf("\tdim = dimensionailty\n");
189 printf("\tnumReps = number of representatives\n");
190 printf("\tnumPtsPerRep = number of points assigned to each representative\n");
191 printf("\toutFile = output file (optional); stored in text format\n");
192 printf("\tGPU num = ID # of the GPU to use (optional) for multi-GPU machines\n");
193 printf("\n\n");
194 exit(0);
195 }
196
197 while(i<argc){
198 if(!strcmp(argv[i], "-f"))
199 dataFile = argv[++i];
200 else if(!strcmp(argv[i], "-n"))
201 n = atoi(argv[++i]);
202 else if(!strcmp(argv[i], "-m"))
203 m = atoi(argv[++i]);
204 else if(!strcmp(argv[i], "-d"))
205 d = atoi(argv[++i]);
206 else if(!strcmp(argv[i], "-r"))
207 numReps = atoi(argv[++i]);
208 else if(!strcmp(argv[i], "-s"))
209 s = atoi(argv[++i]);
210 else if(!strcmp(argv[i], "-o"))
211 outFile = argv[++i];
212 else if(!strcmp(argv[i], "-g"))
213 deviceNum = atoi(argv[++i]);
214 else{
215 fprintf(stderr,"%s : unrecognized option.. exiting\n",argv[i]);
216 exit(1);
217 }
218 i++;
219 }
220
221 if( !n || !m || !d || !numReps || !s || !dataFile){
222 fprintf(stderr,"more arguments needed.. exiting\n");
223 exit(1);
224 }
225
226 if(numReps>n){
227 fprintf(stderr,"can't have more representatives than points.. exiting\n");
228 exit(1);
229 }
230 }
231
232
233 void readData(char *dataFile, unint rows, unint cols, real *data){
234 FILE *fp;
235 unint numRead;
236
237 fp = fopen(dataFile,"r");
238 if(fp==NULL){
239 fprintf(stderr,"error opening file.. exiting\n");
240 exit(1);
241 }
242
243 numRead = fread(data,sizeof(real),rows*cols,fp);
244 if(numRead != rows*cols){
245 fprintf(stderr,"error reading file.. exiting \n");
246 exit(1);
247 }
248 fclose(fp);
249 }
250
251
252 void readDataText(char *dataFile, unint rows, unint cols, real *data){
253 FILE *fp;
254 real t;
255
256 fp = fopen(dataFile,"r");
257 if(fp==NULL){
258 fprintf(stderr,"error opening file.. exiting\n");
259 exit(1);
260 }
261
262 for(int i=0; i<rows; i++){
263 for(int j=0; j<cols; j++){
264 if(fscanf(fp,"%f ", &t)==EOF){
265 fprintf(stderr,"error reading file.. exiting \n");
266 exit(1);
267 }
268 data[IDX( i, j, cols )]=(real)t;
269 }
270 }
271 fclose(fp);
272 }
273
274 //This function splits the data into two matrices, x and q, of
275 //their specified dimensions. The data is split randomly.
276 //It is assumed that the number of rows of data (the parameter n)
277 //is at least as large as x.r+q.r
278 void orgData(real *data, unint n, unint d, matrix x, matrix q){
279
280 unint i,fi,j;
281 unint *p;
282 p = (unint*)calloc(n,sizeof(*p));
283
284 randPerm(n,p);
285
286 for(i=0,fi=0 ; i<x.r ; i++,fi++){
287 for(j=0;j<x.c;j++){
288 x.mat[IDX(i,j,x.ld)] = data[IDX(p[fi],j,d)];
289 }
290 }
291
292 for(i=0 ; i<q.r ; i++,fi++){
293 for(j=0;j<q.c;j++){
294 q.mat[IDX(i,j,q.ld)] = data[IDX(p[fi],j,d)];
295 }
296 }
297
298 free(p);
299 }
300