updated readme/version.txt
[RBC.git] / brute.cu
1 /* This file is part of the Random Ball Cover (RBC) library.
2 * (C) Copyright 2010, Lawrence Cayton [lcayton@tuebingen.mpg.de]
3 */
4
5 #ifndef BRUTE_CU
6 #define BRUTE_CU
7
8 #include "utilsGPU.h"
9 #include "utils.h"
10 #include "rbc.h"
11 #include "defs.h"
12 #include "kernels.h"
13 #include "kernelWrap.h"
14 #include "brute.h"
15 #include<stdio.h>
16 #include<cuda.h>
17 //#include<gsl/gsl_sort.h>
18
19 void bruteRangeCount(matrix x, matrix q, real *ranges, unint *cnts){
20 matrix dx, dq;
21 real *dranges;
22 unint *dcnts;
23
24 copyAndMove(&dx, &x);
25 copyAndMove(&dq, &q);
26
27 checkErr( cudaMalloc( (void**)&dranges, q.pr*sizeof(*dranges) ) );
28 cudaMemcpy( dranges, ranges, q.r*sizeof(*dranges), cudaMemcpyHostToDevice );
29
30 checkErr( cudaMalloc( (void**)&dcnts, q.pr*sizeof(*dcnts) ) );
31
32 rangeCountWrap(dq, dx, dranges, dcnts);
33
34 cudaMemcpy(cnts, dcnts, q.r*sizeof(*cnts), cudaMemcpyDeviceToHost );
35
36 cudaFree(dcnts);
37 cudaFree(dranges);
38 cudaFree(dx.mat);
39 cudaFree(dq.mat);
40 }
41
42
43 void bruteSearch(matrix x, matrix q, unint *NNs){
44 real *dMins;
45 unint *dMinIDs;
46 matrix dx, dq;
47
48
49 dx.r=x.r; dx.pr=x.pr; dx.c=x.c; dx.pc=x.pc; dx.ld=x.ld;
50 dq.r=q.r; dq.pr=q.pr; dq.c=q.c; dq.pc=q.pc; dq.ld=q.ld;
51
52 checkErr( cudaMalloc((void**)&dMins, q.pr*sizeof(*dMins)) );
53 checkErr( cudaMalloc((void**)&dMinIDs, q.pr*sizeof(*dMinIDs)) );
54 checkErr( cudaMalloc((void**)&dx.mat, dx.pr*dx.pc*sizeof(*dx.mat)) );
55 checkErr( cudaMalloc((void**)&dq.mat, dq.pr*dq.pc*sizeof(*dq.mat)) );
56
57 cudaMemcpy(dx.mat,x.mat,x.pr*x.pc*sizeof(*dx.mat),cudaMemcpyHostToDevice);
58 cudaMemcpy(dq.mat,q.mat,q.pr*q.pc*sizeof(*dq.mat),cudaMemcpyHostToDevice);
59
60 nnWrap(dq,dx,dMins,dMinIDs);
61
62 cudaMemcpy(NNs,dMinIDs,dq.r*sizeof(*NNs),cudaMemcpyDeviceToHost);
63
64 cudaFree(dMins);
65 cudaFree(dMinIDs);
66 cudaFree(dx.mat);
67 cudaFree(dq.mat);
68 }
69
70
71 void bruteK(matrix x, matrix q, intMatrix NNs){
72 matrix dMins;
73 intMatrix dMinIDs;
74 matrix dx, dq;
75
76 dx.r=x.r; dx.pr=x.pr; dx.c=x.c; dx.pc=x.pc; dx.ld=x.ld;
77 dq.r=q.r; dq.pr=q.pr; dq.c=q.c; dq.pc=q.pc; dq.ld=q.ld;
78 dMins.r=q.r; dMins.pr=q.pr; dMins.c=K; dMins.pc=K; dMins.ld=dMins.pc;
79 dMinIDs.r=q.r; dMinIDs.pr=q.pr; dMinIDs.c=K; dMinIDs.pc=K; dMinIDs.ld=dMinIDs.pc;
80
81 checkErr( cudaMalloc((void**)&dMins.mat, dMins.pc*dMins.pr*sizeof(*dMins.mat)) );
82 checkErr( cudaMalloc((void**)&dMinIDs.mat, dMinIDs.pc*dMinIDs.pr*sizeof(*dMinIDs.mat)) );
83 checkErr( cudaMalloc((void**)&dx.mat, dx.pr*dx.pc*sizeof(*dx.mat)) );
84 checkErr( cudaMalloc((void**)&dq.mat, dq.pr*dq.pc*sizeof(*dq.mat)) );
85
86 cudaMemcpy(dx.mat,x.mat,x.pr*x.pc*sizeof(*dx.mat),cudaMemcpyHostToDevice);
87 cudaMemcpy(dq.mat,q.mat,q.pr*q.pc*sizeof(*dq.mat),cudaMemcpyHostToDevice);
88
89 knnWrap(dq,dx,dMins,dMinIDs);
90
91 cudaMemcpy(NNs.mat,dMinIDs.mat,NNs.pr*NNs.pc*sizeof(*NNs.mat),cudaMemcpyDeviceToHost);
92
93 cudaFree(dMins.mat);
94 cudaFree(dMinIDs.mat);
95 cudaFree(dx.mat);
96 cudaFree(dq.mat);
97 }
98
99
100 void bruteCPU(matrix X, matrix Q, unint *NNs){
101 real *dtoNNs;
102 real temp;
103
104 unint i, j;
105
106 dtoNNs = (real*)calloc(Q.r,sizeof(*dtoNNs));
107
108 for( i=0; i<Q.r; i++ ){
109 dtoNNs[i] = MAX_REAL;
110 NNs[i] = 0;
111 for(j=0; j<X.r; j++ ){
112 temp = distVec( Q, X, i, j );
113 if( temp < dtoNNs[i]){
114 NNs[i] = j;
115 dtoNNs[i] = temp;
116 }
117 }
118 }
119
120 free(dtoNNs);
121 }
122
123
124 //The following method works properly, but requires the GNU scientific
125 //library. If you want to use it, uncomment the code, uncomment the include
126 //above, and adjust the makefile. It was used for debugging purposes, but
127 //is not required by anything else.
128
129 /* void bruteKCPU(matrix x, matrix q, intMatrix NNs){ */
130 /* int i, j; */
131
132 /* float **d; */
133 /* d = (float**)calloc(q.pr, sizeof(*d)); */
134 /* size_t **t; */
135 /* t = (size_t**)calloc(q.pr, sizeof(*t)); */
136 /* for( i=0; i<q.pr; i++){ */
137 /* d[i] = (float*)calloc(x.pr, sizeof(**d)); */
138 /* t[i] = (size_t*)calloc(x.pr, sizeof(**t)); */
139 /* } */
140
141 /* for( i=0; i<q.r; i++){ */
142 /* for( j=0; j<x.r; j++) */
143 /* d[i][j] = distVec( q, x, i, j ); */
144 /* gsl_sort_float_index(t[i], d[i], 1, x.r); */
145 /* for ( j=0; j<K; j++) */
146 /* NNs.mat[IDX( i, j, NNs.ld )] = t[i][j]; */
147 /* } */
148
149 /* for( i=0; i<q.pr; i++){ */
150 /* free(t[i]); */
151 /* free(d[i]); */
152 /* } */
153 /* free(t); */
154 /* free(d); */
155 /* } */
156 #endif