updated NN functions so that they return the distances as well as the indices
[RBC.git] / brute.cu
1 /* This file is part of the Random Ball Cover (RBC) library.
2 * (C) Copyright 2010, Lawrence Cayton [lcayton@tuebingen.mpg.de]
3 */
4
5 #ifndef BRUTE_CU
6 #define BRUTE_CU
7
8 #include "utilsGPU.h"
9 #include "utils.h"
10 #include "rbc.h"
11 #include "defs.h"
12 #include "kernels.h"
13 #include "kernelWrap.h"
14 #include "brute.h"
15 #include<stdio.h>
16 #include<cuda.h>
17 //#include<gsl/gsl_sort.h>
18
19 void bruteRangeCount(matrix x, matrix q, real *ranges, unint *cnts){
20 matrix dx, dq;
21 real *dranges;
22 unint *dcnts;
23
24 copyAndMove(&dx, &x);
25 copyAndMove(&dq, &q);
26
27 checkErr( cudaMalloc( (void**)&dranges, q.pr*sizeof(*dranges) ) );
28 cudaMemcpy( dranges, ranges, q.r*sizeof(*dranges), cudaMemcpyHostToDevice );
29
30 checkErr( cudaMalloc( (void**)&dcnts, q.pr*sizeof(*dcnts) ) );
31
32 rangeCountWrap(dq, dx, dranges, dcnts);
33
34 cudaMemcpy(cnts, dcnts, q.r*sizeof(*cnts), cudaMemcpyDeviceToHost );
35
36 cudaFree(dcnts);
37 cudaFree(dranges);
38 cudaFree(dx.mat);
39 cudaFree(dq.mat);
40 }
41
42
43 void bruteSearch(matrix x, matrix q, unint *NNs){
44 real *dMins;
45 unint *dMinIDs;
46 matrix dx, dq;
47
48
49 dx.r=x.r; dx.pr=x.pr; dx.c=x.c; dx.pc=x.pc; dx.ld=x.ld;
50 dq.r=q.r; dq.pr=q.pr; dq.c=q.c; dq.pc=q.pc; dq.ld=q.ld;
51
52 checkErr( cudaMalloc((void**)&dMins, q.pr*sizeof(*dMins)) );
53 checkErr( cudaMalloc((void**)&dMinIDs, q.pr*sizeof(*dMinIDs)) );
54 checkErr( cudaMalloc((void**)&dx.mat, dx.pr*dx.pc*sizeof(*dx.mat)) );
55 checkErr( cudaMalloc((void**)&dq.mat, dq.pr*dq.pc*sizeof(*dq.mat)) );
56
57 cudaMemcpy(dx.mat,x.mat,x.pr*x.pc*sizeof(*dx.mat),cudaMemcpyHostToDevice);
58 cudaMemcpy(dq.mat,q.mat,q.pr*q.pc*sizeof(*dq.mat),cudaMemcpyHostToDevice);
59
60 nnWrap(dq,dx,dMins,dMinIDs);
61
62 cudaMemcpy(NNs,dMinIDs,dq.r*sizeof(*NNs),cudaMemcpyDeviceToHost);
63
64 cudaFree(dMins);
65 cudaFree(dMinIDs);
66 cudaFree(dx.mat);
67 cudaFree(dq.mat);
68 }
69
70
71 void bruteK(matrix x, matrix q, intMatrix NNs, matrix NNdists){
72 matrix dNNdists;
73 intMatrix dMinIDs;
74 matrix dx, dq;
75
76 dx.r=x.r; dx.pr=x.pr; dx.c=x.c; dx.pc=x.pc; dx.ld=x.ld;
77 dq.r=q.r; dq.pr=q.pr; dq.c=q.c; dq.pc=q.pc; dq.ld=q.ld;
78 dNNdists.r=q.r; dNNdists.pr=q.pr; dNNdists.c=K; dNNdists.pc=K; dNNdists.ld=dNNdists.pc;
79 dMinIDs.r=q.r; dMinIDs.pr=q.pr; dMinIDs.c=K; dMinIDs.pc=K; dMinIDs.ld=dMinIDs.pc;
80
81 checkErr( cudaMalloc((void**)&dNNdists.mat, dNNdists.pc*dNNdists.pr*sizeof(*dNNdists.mat)) );
82 checkErr( cudaMalloc((void**)&dMinIDs.mat, dMinIDs.pc*dMinIDs.pr*sizeof(*dMinIDs.mat)) );
83 checkErr( cudaMalloc((void**)&dx.mat, dx.pr*dx.pc*sizeof(*dx.mat)) );
84 checkErr( cudaMalloc((void**)&dq.mat, dq.pr*dq.pc*sizeof(*dq.mat)) );
85
86 cudaMemcpy(dx.mat,x.mat,x.pr*x.pc*sizeof(*dx.mat),cudaMemcpyHostToDevice);
87 cudaMemcpy(dq.mat,q.mat,q.pr*q.pc*sizeof(*dq.mat),cudaMemcpyHostToDevice);
88
89 knnWrap(dq,dx,dNNdists,dMinIDs);
90
91 cudaMemcpy(NNs.mat,dMinIDs.mat,NNs.pr*NNs.pc*sizeof(*NNs.mat),cudaMemcpyDeviceToHost);
92 cudaMemcpy(NNdists.mat,dNNdists.mat,NNdists.pr*NNdists.pc*sizeof(*NNdists.mat),cudaMemcpyDeviceToHost);
93
94 cudaFree(dNNdists.mat);
95 cudaFree(dMinIDs.mat);
96 cudaFree(dx.mat);
97 cudaFree(dq.mat);
98 }
99
100
101 void bruteCPU(matrix X, matrix Q, unint *NNs){
102 real *dtoNNs;
103 real temp;
104
105 unint i, j;
106
107 dtoNNs = (real*)calloc(Q.r,sizeof(*dtoNNs));
108
109 for( i=0; i<Q.r; i++ ){
110 dtoNNs[i] = MAX_REAL;
111 NNs[i] = 0;
112 for(j=0; j<X.r; j++ ){
113 temp = distVec( Q, X, i, j );
114 if( temp < dtoNNs[i]){
115 NNs[i] = j;
116 dtoNNs[i] = temp;
117 }
118 }
119 }
120
121 free(dtoNNs);
122 }
123
124
125 //The following method works properly, but requires the GNU scientific
126 //library. If you want to use it, uncomment the code, uncomment the include
127 //above, and adjust the makefile. It was used for debugging purposes, but
128 //is not required by anything else.
129
130 /* void bruteKCPU(matrix x, matrix q, intMatrix NNs){ */
131 /* int i, j; */
132
133 /* float **d; */
134 /* d = (float**)calloc(q.pr, sizeof(*d)); */
135 /* size_t **t; */
136 /* t = (size_t**)calloc(q.pr, sizeof(*t)); */
137 /* for( i=0; i<q.pr; i++){ */
138 /* d[i] = (float*)calloc(x.pr, sizeof(**d)); */
139 /* t[i] = (size_t*)calloc(x.pr, sizeof(**t)); */
140 /* } */
141
142 /* for( i=0; i<q.r; i++){ */
143 /* for( j=0; j<x.r; j++) */
144 /* d[i][j] = distVec( q, x, i, j ); */
145 /* gsl_sort_float_index(t[i], d[i], 1, x.r); */
146 /* for ( j=0; j<K; j++) */
147 /* NNs.mat[IDX( i, j, NNs.ld )] = t[i][j]; */
148 /* } */
149
150 /* for( i=0; i<q.pr; i++){ */
151 /* free(t[i]); */
152 /* free(d[i]); */
153 /* } */
154 /* free(t); */
155 /* free(d); */
156 /* } */
157 #endif