Stable release of RBC
[RBC.git] / kernelWrap.cu
1 #ifndef KERNELWRAP_CU
2 #define KERNELWRAP_CU
3
4 #include<cuda.h>
5 #include<stdio.h>
6 #include "kernels.h"
7 #include "defs.h"
8
9 void dist1Wrap(matrix dq, matrix dx, matrix dD){
10 dim3 block(BLOCK_SIZE,BLOCK_SIZE);
11 dim3 grid(dx.pr/BLOCK_SIZE,dq.pr/BLOCK_SIZE);
12
13 dist1Kernel<<<grid,block>>>(dq,dx,dD);
14 cudaThreadSynchronize();
15 }
16
17
18 void findRangeWrap(matrix dD, real *dranges, int cntWant){
19 dim3 block(4*BLOCK_SIZE,BLOCK_SIZE/4);
20 dim3 grid(1,4*(dD.pr/BLOCK_SIZE));
21
22 findRangeKernel<<<grid,block>>>(dD,dranges,cntWant);
23 cudaThreadSynchronize();
24 }
25
26 void rangeSearchWrap(matrix dD, real *dranges, charMatrix dir){
27 dim3 block(BLOCK_SIZE,BLOCK_SIZE);
28 dim3 grid(dD.pc/BLOCK_SIZE,dD.pr/BLOCK_SIZE);
29
30 rangeSearchKernel<<<grid,block>>>(dD,dranges,dir);
31 cudaThreadSynchronize();
32 }
33
34 void nnWrap(const matrix dx, const matrix dy, real *dMins, int *dMinIDs){
35 dim3 dimBlock(BLOCK_SIZE,BLOCK_SIZE);
36 dim3 dimGrid;
37
38 dimGrid.x = 1;
39 dimGrid.y = dx.pr/dimBlock.y + (dx.pr%dimBlock.y==0 ? 0 : 1);
40 nnKernel<<<dimGrid,dimBlock>>>(dx,dy,dMins,dMinIDs);
41 cudaThreadSynchronize();
42 }
43
44
45 void rangeCountWrap(const matrix dq, const matrix dx, real *dranges, int *dcounts){
46 dim3 block(BLOCK_SIZE,BLOCK_SIZE);
47 dim3 grid(1,dq.pr/BLOCK_SIZE);
48
49 rangeCountKernel<<<grid,block>>>(dq,dx,dranges,dcounts);
50 cudaThreadSynchronize();
51 }
52
53
54 /*NOTE: can be deleted */
55 void pruneWrap(charMatrix dcM, matrix dD, real *dradiiX, real *dradiiQ){
56 dim3 block(BLOCK_SIZE,BLOCK_SIZE);
57 dim3 grid(dD.pr/BLOCK_SIZE,dD.pc/BLOCK_SIZE);
58
59 pruneKernel<<<grid,block>>>(dD,dradiiX,dradiiQ,dcM);
60 cudaThreadSynchronize();
61 }
62 #endif