k-nn brute implemented
[RBC.git] / brute.cu
1 /* This file is part of the Random Ball Cover (RBC) library.
2 * (C) Copyright 2010, Lawrence Cayton [lcayton@tuebingen.mpg.de]
3 */
4
5 #ifndef BRUTE_CU
6 #define BRUTE_CU
7
8 #include "utilsGPU.h"
9 #include "utils.h"
10 #include "rbc.h"
11 #include "defs.h"
12 #include "kernels.h"
13 #include "kernelWrap.h"
14 #include "brute.h"
15 #include<stdio.h>
16 #include<cuda.h>
17 #include<gsl/gsl_sort.h>
18
19 void bruteRangeCount(matrix x, matrix q, real *ranges, unint *cnts){
20 matrix dx, dq;
21 real *dranges;
22 unint *dcnts;
23
24 copyAndMove(&dx, &x);
25 copyAndMove(&dq, &q);
26
27 checkErr( cudaMalloc( (void**)&dranges, q.pr*sizeof(*dranges) ) );
28 cudaMemcpy( dranges, ranges, q.r*sizeof(*dranges), cudaMemcpyHostToDevice );
29
30 checkErr( cudaMalloc( (void**)&dcnts, q.pr*sizeof(*dcnts) ) );
31
32 rangeCountWrap(dq, dx, dranges, dcnts);
33
34 cudaMemcpy(cnts, dcnts, q.r*sizeof(*cnts), cudaMemcpyDeviceToHost );
35
36 cudaFree(dcnts);
37 cudaFree(dranges);
38 cudaFree(dx.mat);
39 cudaFree(dq.mat);
40 }
41
42
43 void bruteSearch(matrix x, matrix q, unint *NNs){
44 real *dMins;
45 unint *dMinIDs;
46 matrix dx, dq;
47
48
49 dx.r=x.r; dx.pr=x.pr; dx.c=x.c; dx.pc=x.pc; dx.ld=x.ld;
50 dq.r=q.r; dq.pr=q.pr; dq.c=q.c; dq.pc=q.pc; dq.ld=q.ld;
51
52 checkErr( cudaMalloc((void**)&dMins, q.pr*sizeof(*dMins)) );
53 checkErr( cudaMalloc((void**)&dMinIDs, q.pr*sizeof(*dMinIDs)) );
54 checkErr( cudaMalloc((void**)&dx.mat, dx.pr*dx.pc*sizeof(*dx.mat)) );
55 checkErr( cudaMalloc((void**)&dq.mat, dq.pr*dq.pc*sizeof(*dq.mat)) );
56
57 cudaMemcpy(dx.mat,x.mat,x.pr*x.pc*sizeof(*dx.mat),cudaMemcpyHostToDevice);
58 cudaMemcpy(dq.mat,q.mat,q.pr*q.pc*sizeof(*dq.mat),cudaMemcpyHostToDevice);
59
60 nnWrap(dq,dx,dMins,dMinIDs);
61
62 cudaMemcpy(NNs,dMinIDs,dq.r*sizeof(*NNs),cudaMemcpyDeviceToHost);
63
64 cudaFree(dMins);
65 cudaFree(dMinIDs);
66 cudaFree(dx.mat);
67 cudaFree(dq.mat);
68 }
69
70
71 void bruteK(matrix x, matrix q, intMatrix NNs){
72 matrix dMins;
73 intMatrix dMinIDs;
74 matrix dx, dq;
75
76 dx.r=x.r; dx.pr=x.pr; dx.c=x.c; dx.pc=x.pc; dx.ld=x.ld;
77 dq.r=q.r; dq.pr=q.pr; dq.c=q.c; dq.pc=q.pc; dq.ld=q.ld;
78 dMins.r=q.r; dMins.pr=q.pr; dMins.c=K; dMins.pc=K; dMins.ld=dMins.pc;
79 dMinIDs.r=q.r; dMinIDs.pr=q.pr; dMinIDs.c=K; dMinIDs.pc=K; dMinIDs.ld=dMinIDs.pc;
80
81 checkErr( cudaMalloc((void**)&dMins.mat, dMins.pc*dMins.pr*sizeof(*dMins.mat)) );
82 checkErr( cudaMalloc((void**)&dMinIDs.mat, dMinIDs.pc*dMinIDs.pr*sizeof(*dMinIDs.mat)) );
83 checkErr( cudaMalloc((void**)&dx.mat, dx.pr*dx.pc*sizeof(*dx.mat)) );
84 checkErr( cudaMalloc((void**)&dq.mat, dq.pr*dq.pc*sizeof(*dq.mat)) );
85
86 cudaMemcpy(dx.mat,x.mat,x.pr*x.pc*sizeof(*dx.mat),cudaMemcpyHostToDevice);
87 cudaMemcpy(dq.mat,q.mat,q.pr*q.pc*sizeof(*dq.mat),cudaMemcpyHostToDevice);
88
89 knnWrap(dq,dx,dMins,dMinIDs);
90
91 cudaMemcpy(NNs.mat,dMinIDs.mat,NNs.pr*NNs.pc*sizeof(*NNs.mat),cudaMemcpyDeviceToHost);
92
93 cudaFree(dMins.mat);
94 cudaFree(dMinIDs.mat);
95 cudaFree(dx.mat);
96 cudaFree(dq.mat);
97 }
98
99
100 void bruteCPU(matrix X, matrix Q, unint *NNs){
101 real *dtoNNs;
102 real temp;
103
104 unint i, j;
105
106 dtoNNs = (real*)calloc(Q.r,sizeof(*dtoNNs));
107
108 for( i=0; i<Q.r; i++ ){
109 dtoNNs[i] = MAX_REAL;
110 NNs[i] = 0;
111 for(j=0; j<X.r; j++ ){
112 temp = distVec( Q, X, i, j );
113 if( temp < dtoNNs[i]){
114 NNs[i] = j;
115 dtoNNs[i] = temp;
116 }
117 }
118 }
119
120 free(dtoNNs);
121 }
122
123
124 void bruteKCPU(matrix x, matrix q, intMatrix NNs){
125 int i, j;
126
127 float **d;
128 d = (float**)calloc(q.pr, sizeof(*d));
129 size_t **t;
130 t = (size_t**)calloc(q.pr, sizeof(*t));
131 for( i=0; i<q.pr; i++){
132 d[i] = (float*)calloc(x.pr, sizeof(**d));
133 t[i] = (size_t*)calloc(x.pr, sizeof(**t));
134 }
135
136 //#pragma omp parallel for private(j)
137 for( i=0; i<q.r; i++){
138 for( j=0; j<x.r; j++)
139 d[i][j] = distVec( q, x, i, j );
140 gsl_sort_float_index(t[i], d[i], 1, x.r);
141 for ( j=0; j<K; j++)
142 NNs.mat[IDX( i, j, NNs.ld )] = t[i][j];
143 }
144
145 for( i=0; i<q.pr; i++){
146 free(t[i]);
147 free(d[i]);
148 }
149 free(t);
150 free(d);
151 }
152 #endif