Stable release of RBC
[RBC.git] / utilsGPU.cu
1 #ifndef UTILSGPU_CU
2 #define UTILSGPU_CU
3
4 #include<cuda.h>
5 #include<stdio.h>
6 #include "defs.h"
7
8 memPlan createMemPlan(int nPts, int memPerPt){
9 memPlan mp;
10 unsigned int memFree, memTot;
11 int ptsAtOnce;
12
13 cuMemGetInfo(&memFree, &memTot);
14 memFree = (int)(((float)memFree)*MEM_USABLE);
15 printf("memfree = %d \n",memFree);
16 ptsAtOnce = DPAD(memFree/memPerPt); //max number of pts that can be processed at once
17 printf("ptsAtOnce = %d \n",ptsAtOnce);
18 mp.numComputeSegs = nPts/ptsAtOnce + ((nPts%ptsAtOnce==0) ? 0 : 1);
19 mp.normSegSize=PAD(nPts/mp.numComputeSegs);
20 mp.lastSegSize=PAD(nPts) - mp.normSegSize*(mp.numComputeSegs-1);
21 //Note that lastSegSize is automatically padded if nPts is.
22 return mp;
23 }
24
25 void copyAndMove(matrix *dx, const matrix *x){
26 dx->r = x->r;
27 dx->c = x->c;
28 dx->pr = x->pr;
29 dx->pc = x->pc;
30 dx->ld = x->ld;
31
32 cudaMalloc( (void**)&(dx->mat), dx->pr*dx->pc*sizeof(*(dx->mat)) );
33 cudaMemcpy( dx->mat, x->mat, dx->pr*dx->pc*sizeof(*(dx->mat)), cudaMemcpyHostToDevice );
34
35 }
36
37
38 void copyAndMoveI(intMatrix *dx, const intMatrix *x){
39 dx->r = x->r;
40 dx->c = x->c;
41 dx->pr = x->pr;
42 dx->pc = x->pc;
43 dx->ld = x->ld;
44
45 cudaMalloc( (void**)&(dx->mat), dx->pr*dx->pc*sizeof(*(dx->mat)) );
46 cudaMemcpy( dx->mat, x->mat, dx->pr*dx->pc*sizeof(*(dx->mat)), cudaMemcpyHostToDevice );
47
48 }
49
50
51 void copyAndMoveC(charMatrix *dx, const charMatrix *x){
52 dx->r = x->r;
53 dx->c = x->c;
54 dx->pr = x->pr;
55 dx->pc = x->pc;
56 dx->ld = x->ld;
57
58 cudaMalloc( (void**)&(dx->mat), dx->pr*dx->pc*sizeof(*(dx->mat)) );
59 cudaMemcpy( dx->mat, x->mat, dx->pr*dx->pc*sizeof(*(dx->mat)), cudaMemcpyHostToDevice );
60
61 }
62 #endif