caught host memory leak
[RBC.git] / oldKernels.cu
1 /* ****** Device helper functions ****** */
2
3 __device__ matrix getSubMat(matrix A, int row, int col){
4 matrix As;
5
6 As.c = BLOCK_SIZE;
7 As.r = BLOCK_SIZE;
8 As.ld = A.ld;
9 As.mat = &A.mat[A.ld*row*BLOCK_SIZE + col*BLOCK_SIZE];
10
11 return As;
12 }
13
14
15
16 //l_1-norm
17 /* __global__ void dist1Kernel(const matrix Q, const matrix X, matrix D){ */
18 /* int qBlockRow = blockIdx.y; */
19 /* int xBlockRow = blockIdx.x; */
20
21 /* //matrix Dsub = getSubMat(D,xBlockRow,qBlockRow); */
22
23 /* int qID = threadIdx.y; */
24 /* int xID = threadIdx.x; */
25
26 /* // printf("calling (%d,%d) \n",qBlockRow*BLOCK_SIZE+qID,xBlockRow*BLOCK_SIZE+xID); */
27
28 /* int i,j; */
29 /* real ans=0; */
30
31 /* //Note: assumes that X is padded. */
32 /* for(i=0;i<X.pc/BLOCK_SIZE;i++){ */
33 /* // matrix Xs = getSubMat(X,xBlockRow,i); */
34 /* // matrix Qs = getSubMat(Q,qBlockRow,i); */
35
36 /* __shared__ real Xb[BLOCK_SIZE][BLOCK_SIZE]; */
37 /* __shared__ real Qb[BLOCK_SIZE][BLOCK_SIZE]; */
38
39 /* // Each thread loads one element of Xs and Qs into shared mem */
40 /* // Note that the indexing is swapped to increase memory coalescing. */
41 /* //printf("reading x[%d,%d] \n",xBlockRow*BLOCK_SIZE+qID,i*BLOCK_SIZE+xID); */
42 /* Xb[xID][qID]=X.mat[IDX(xBlockRow*BLOCK_SIZE+qID,i*BLOCK_SIZE+xID,X.ld)];// getElement(Xs,qID,xID); */
43 /* //printf("reading q[%d,%d] \n",qBlockRow*BLOCK_SIZE+qID,i*BLOCK_SIZE+xID); */
44 /* Qb[xID][qID]=Q.mat[IDX(qBlockRow*BLOCK_SIZE+qID,i*BLOCK_SIZE+xID,Q.ld)]; */
45
46 /* __syncthreads(); */
47
48 /* for(j=0;j<BLOCK_SIZE;j++) */
49 /* ans+=abs(Xb[j][xID]-Qb[j][qID]); */
50
51 /* __syncthreads(); */
52 /* } */
53
54 /* // Dsub.mat[IDX(qID,xID,Dsub.ld)]=ans;//setElement(Dsub,qID,xID,ans); */
55 /* //printf("writing (%d,%d) %6.2f \n",qBlockRow*BLOCK_SIZE+qID,xBlockRow*BLOCK_SIZE+xID,ans); */
56 /* // Dsub.mat[IDX(qID,xID,Dsub.ld)]=ans; */
57 /* D.mat[IDX(qBlockRow*BLOCK_SIZE+qID,xBlockRow*BLOCK_SIZE+xID,D.ld)]=ans; */
58
59 /* } */