1 /* This file is part of the Random Ball Cover (RBC) library.
2 * (C) Copyright 2010, Lawrence Cayton [lcayton@tuebingen.mpg.de]
3 */
5 #ifndef SKERNEL_CU
6 #define SKERNEL_CU
8 #include<stdio.h>
9 #include<math.h>
10 #include "sKernel.h"
11 #include "defs.h"
12 #include "utils.h"
14 __global__ void sumKernel(charMatrix in, intMatrix sum, intMatrix sumaux, unint n){
16 unint bo = blockIdx.x*SCAN_WIDTH; //block offset
17 unint r = blockIdx.y;
18 unint d, t;
20 const unint l=SCAN_WIDTH; //length
22 unint off=1;
24 __shared__ unint ssum[l];
26 ssum[2*id] = (bo+2*id < n) ? in.mat[IDX( r, bo+2*id, in.ld )] : 0;
27 ssum[2*id+1] = (bo+2*id+1 < n) ? in.mat[IDX( r, bo+2*id+1, in.ld)] : 0;
29 //up-sweep
30 for( d=l>>1; d > 0; d>>=1 ){
33 if( id < d ){
34 ssum[ off*(2*id+2)-1 ] += ssum[ off*(2*id+1)-1 ];
35 }
36 off *= 2;
37 }
41 if ( id == 0 ){
42 sumaux.mat[IDX( r, blockIdx.x, sumaux.ld )] = ssum[ l-1 ];
43 ssum[ l-1 ] = 0;
44 }
46 //down-sweep
47 for ( d=1; d<l; d*=2 ){
48 off >>= 1;
51 if( id < d ){
52 t = ssum[ off*(2*id+1)-1 ];
53 ssum[ off*(2*id+1)-1 ] = ssum[ off*(2*id+2)-1 ];
54 ssum[ off*(2*id+2)-1 ] += t;
55 }
56 }
60 if( bo+2*id < n )
61 sum.mat[IDX( r, bo+2*id, sum.ld )] = ssum[2*id];
62 if( bo+2*id+1 < n )
63 sum.mat[IDX( r, bo+2*id+1, sum.ld )] = ssum[2*id+1];
64 }
67 //This is the same as sumKernel, but takes an int matrix as input.
68 __global__ void sumKernelI(intMatrix in, intMatrix sum, intMatrix sumaux, unint n){
70 unint bo = blockIdx.x*SCAN_WIDTH; //block offset
71 unint r = blockIdx.y;
72 unint d, t;
74 const unint l=SCAN_WIDTH; //length
76 unint off=1;
78 __shared__ unint ssum[l];
80 ssum[2*id] = (bo+2*id < n) ? in.mat[IDX( r, bo+2*id, in.ld )] : 0;
81 ssum[2*id+1] = (bo+2*id+1 < n) ? in.mat[IDX( r, bo+2*id+1, in.ld)] : 0;
83 //up-sweep
84 for( d=l>>1; d > 0; d>>=1 ){
87 if( id < d ){
88 ssum[ off*(2*id+2)-1 ] += ssum[ off*(2*id+1)-1 ];
89 }
90 off *= 2;
91 }
95 if ( id == 0 ){
96 sumaux.mat[IDX( r, blockIdx.x, sumaux.ld )] = ssum[ l-1 ];
97 ssum[ l-1 ] = 0;
98 }
100 //down-sweep
101 for ( d=1; d<l; d*=2 ){
102 off >>= 1;
105 if( id < d ){
106 t = ssum[ off*(2*id+1)-1 ];
107 ssum[ off*(2*id+1)-1 ] = ssum[ off*(2*id+2)-1 ];
108 ssum[ off*(2*id+2)-1 ] += t;
109 }
110 }
114 if( bo+2*id < n )
115 sum.mat[IDX( r, bo+2*id, sum.ld )] = ssum[2*id];
117 if( bo+2*id+1 < n )
118 sum.mat[IDX( r, bo+2*id+1, sum.ld )] = ssum[2*id+1];
119 }
123 __global__ void combineSumKernel(intMatrix sum, unint numDone, intMatrix daux, unint n){
125 unint bo = blockIdx.x * SCAN_WIDTH;
126 unint r = blockIdx.y+numDone;
128 if(bo+2*id < n)
129 sum.mat[IDX( r, bo+2*id, sum.ld )] += daux.mat[IDX( r, blockIdx.x, daux.ld )];
130 if(bo+2*id+1 < n)
131 sum.mat[IDX( r, bo+2*id+1, sum.ld )] += daux.mat[IDX( r, blockIdx.x, daux.ld )];
133 }
136 __global__ void getCountsKernel(unint *counts, unint numDone, charMatrix ir, intMatrix sums){
137 unint r = blockIdx.x*BLOCK_SIZE + threadIdx.x + numDone;
138 if ( r < ir.r ){
139 counts[r] = ir.mat[IDX( r, ir.c-1, ir.ld )] ? sums.mat[IDX( r, sums.c-1, sums.ld )]+1 : sums.mat[IDX( r, sums.c-1, sums.ld )];
140 }
141 }
144 __global__ void buildMapKernel(intMatrix map, charMatrix ir, intMatrix sums, unint offSet){