Modified the scan kernel so that it can work with > 1024*1024 numbers
[RBC.git] / driver.cu
1 #include<stdio.h>
2 #include<stdlib.h>
3 #include<cuda.h>
4 #include<sys/time.h>
5 #include<math.h>
6 #include "defs.h"
7 #include "utils.h"
8 #include "utilsGPU.h"
9 #include "rbc.h"
10 #include "brute.h"
11 #include "sKernel.h"
12
13 void parseInput(int,char**);
14 void readData(char*,int,int,real*);
15 void orgData(real*,int,int,matrix,matrix);
16
17
18 char *dataFile, *outFile;
19 int n=0, m=0, d=0, numReps=0, s=0;
20 int deviceNum=0;
21 int main(int argc, char**argv){
22 real *data;
23 matrix x, q;
24 int *NNs, *NNsBrute;
25 int i;
26 struct timeval tvB,tvE;
27 cudaError_t cE;
28
29 printf("*****************\n");
30 printf("RANDOM BALL COVER\n");
31 printf("*****************\n");
32
33 parseInput(argc,argv);
34
35 cuInit(0);
36 printf("Using GPU #%d\n",deviceNum);
37 if(cudaSetDevice(deviceNum) != cudaSuccess){
38 printf("Unable to select device %d.. exiting. \n",deviceNum);
39 exit(1);
40 }
41
42
43 unsigned int memFree, memTot;
44 CUcontext pctx;
45 unsigned int flags=0;
46 int device;
47 cudaGetDevice(&device);
48 cuCtxCreate(&pctx,flags,device);
49 cuMemGetInfo(&memFree, &memTot);
50 printf("GPU memory free = %u/%u (MB) \n",memFree/(1024*1024),memTot/(1024*1024));
51
52 data = (real*)calloc( (n+m)*d, sizeof(*data) );
53 x.mat = (real*)calloc( PAD(n)*PAD(d), sizeof(*(x.mat)) );
54
55 //Need to allocate extra space, as each group of q will be padded later.
56 q.mat = (real*)calloc( PAD(m)*PAD(d), sizeof(*(q.mat)) );
57 x.r = n; x.c = d; x.pr = PAD(n); x.pc = PAD(d); x.ld = x.pc;
58 q.r = m; q.c = d; q.pr = PAD(m); q.pc = PAD(d); q.ld = q.pc;
59
60 NNs = (int*)calloc( m, sizeof(*NNs) );
61 for(i=0; i<m; i++)
62 NNs[i]=-1;
63 NNsBrute = (int*)calloc( m, sizeof(*NNsBrute) );
64
65 readData(dataFile, (n+m), d, data);
66 orgData(data, (n+m), d, x, q);
67 free(data);
68
69
70
71 /* printf("db:\n"); */
72 /* printMat(x); */
73 /* printf("\nqueries: \n"); */
74 /* printMat(q); */
75 /* printf("\n\n"); */
76
77 for(i=0;i<m;i++)
78 NNs[i]=NNsBrute[i]=DUMMY_IDX;
79
80 /* printf("running brute force..\n"); */
81 /* gettimeofday(&tvB,NULL); */
82 /* bruteSearch(x,q,NNsBrute); */
83 /* gettimeofday(&tvE,NULL); */
84 /* printf("\t.. time elapsed = %6.4f \n",timeDiff(tvB,tvE)); */
85
86
87 printf("\nrunning rbc..\n");
88 gettimeofday(&tvB,NULL);
89 rbc(x,q,numReps,s,NNs);
90 gettimeofday(&tvE,NULL);
91 printf("\t.. total time elapsed for rbc = %6.4f \n",timeDiff(tvB,tvE));
92 printf("finished \n");
93
94 cE = cudaGetLastError();
95 if( cE != cudaSuccess ){
96 printf("Execution failed; error type: %s \n", cudaGetErrorString(cE) );
97 }
98
99 printf("\nComputing error rates (this might take a while)\n");
100 real *ranges = (real*)calloc(q.pr,sizeof(*ranges));
101 for(i=0;i<q.r;i++)
102 ranges[i] = distL1(q,x,i,NNs[i]) - 10e-6;
103
104
105 int *cnts = (int*)calloc(q.pr,sizeof(*cnts));
106 gettimeofday(&tvB,NULL);
107 bruteRangeCount(x,q,ranges,cnts);
108 gettimeofday(&tvE,NULL);
109
110 long int nc=0;
111 for(i=0;i<m;i++){
112 nc += cnts[i];
113 }
114 double mean = ((double)nc)/((double)m);
115 double var = 0.0;
116 for(i=0;i<m;i++) {
117 var += (((double)cnts[i])-mean)*(((double)cnts[i])-mean)/((double)m);
118 }
119 printf("\tavg rank = %6.4f; std dev = %6.4f \n\n", mean, sqrt(var));
120 printf("(range count took %6.4f) \n", timeDiff(tvB, tvE));
121
122 if(outFile){
123 FILE* fp = fopen(outFile, "a");
124 fprintf( fp, "%d %d %6.5f %6.5f \n", numReps, s, mean, sqrt(var) );
125 fclose(fp);
126 }
127
128 free(ranges);
129 free(cnts);
130 free(NNs);
131 free(NNsBrute);
132 free(x.mat);
133 free(q.mat);
134 }
135
136
137 void parseInput(int argc, char **argv){
138 int i=1;
139 if(argc <= 1){
140 printf("\nusage: \n testRBC -f datafile (bin) -n numPts (DB) -m numQueries -d dim -r numReps -s numPtsPerRep [-o outFile] [-g GPU num]\n\n");
141 exit(0);
142 }
143
144 while(i<argc){
145 if(!strcmp(argv[i], "-f"))
146 dataFile = argv[++i];
147 else if(!strcmp(argv[i], "-n"))
148 n = atoi(argv[++i]);
149 else if(!strcmp(argv[i], "-m"))
150 m = atoi(argv[++i]);
151 else if(!strcmp(argv[i], "-d"))
152 d = atoi(argv[++i]);
153 else if(!strcmp(argv[i], "-r"))
154 numReps = atoi(argv[++i]);
155 else if(!strcmp(argv[i], "-s"))
156 s = atoi(argv[++i]);
157 else if(!strcmp(argv[i], "-o"))
158 outFile = argv[++i];
159 else if(!strcmp(argv[i], "-g"))
160 deviceNum = atoi(argv[++i]);
161 else{
162 fprintf(stderr,"%s : unrecognized option.. exiting\n",argv[i]);
163 exit(1);
164 }
165 i++;
166 }
167
168 if( !n || !m || !d || !numReps || !s || !dataFile){
169 fprintf(stderr,"more arguments needed.. exiting\n");
170 exit(1);
171 }
172
173 if(numReps>n){
174 fprintf(stderr,"can't have more representatives than points.. exiting\n");
175 exit(1);
176 }
177 }
178
179
180 void readData(char *dataFile, int rows, int cols, real *data){
181 FILE *fp;
182 int numRead;
183
184 fp = fopen(dataFile,"r");
185 if(fp==NULL){
186 fprintf(stderr,"error opening file.. exiting\n");
187 exit(1);
188 }
189
190 numRead = fread(data,sizeof(real),rows*cols,fp);
191 if(numRead != rows*cols){
192 fprintf(stderr,"error reading file.. exiting \n");
193 exit(1);
194 }
195 fclose(fp);
196 }
197
198
199 //This function splits the data into two matrices, x and q, of
200 //their specified dimensions. The data is split randomly.
201 //It is assumed that the number of rows of data (the parameter n)
202 //is at least as large as x.r+q.r
203 void orgData(real *data, int n, int d, matrix x, matrix q){
204
205 int i,fi,j;
206 int *p;
207 p = (int*)calloc(n,sizeof(*p));
208
209 randPerm(n,p);
210
211 for(i=0,fi=0 ; i<x.r ; i++,fi++){
212 for(j=0;j<x.c;j++){
213 x.mat[IDX(i,j,x.ld)] = data[IDX(p[fi],j,d)];
214 }
215 }
216
217 for(i=0 ; i<q.r ; i++,fi++){
218 for(j=0;j<q.c;j++){
219 q.mat[IDX(i,j,q.ld)] = data[IDX(p[fi],j,d)];
220 }
221 }
222
223 free(p);
224 }
225