code buffer

分层分装
#include <iostream>

using namespace std;

__device__
int offset = 1;

struct A_device {
int mH;
int mW;
int *m_d_Dynamic;
int mDynamicSize;
int func( int );
};

class A {
public:
A_device d;
int *m_h_Dynamic;
A(): d((A_device){
0,0,NULL,sizeof(int) * 4}), m_h_Dynamic(NULL) {
allocMem();
}
~A() { freeMem(); }

void run_device(int* h_sum, size_t MAXN);

void setValue() {
m_h_Dynamic[
0] = 1000;
m_h_Dynamic[
1] = 10000;
m_h_Dynamic[
2] = 100000;
m_h_Dynamic[
3] = 1000000;
cudaMemcpy( d.m_d_Dynamic, m_h_Dynamic, d.mDynamicSize, cudaMemcpyHostToDevice);
}

private:
void freeMem();
void allocMem();
};

__device__
int
A_device::func(
int i) {
return m_d_Dynamic[i%4];
}

__global__
void KernelTest( int *sum, A_device d_insA) { //copy h_insA to d_insA in call stack, then copy d_insA from host to device

sum[threadIdx.x]
= threadIdx.x + d_insA.func(threadIdx.x) + offset;
}

__host__
void
A::freeMem() {
free(m_h_Dynamic);
cudaFree(d.m_d_Dynamic);
}

void
A::allocMem() {
m_h_Dynamic
= (int*)malloc(d.mDynamicSize);
cudaMalloc( (
void**)&d.m_d_Dynamic, d.mDynamicSize);
}

void
A::run_device(
int* h_sum, size_t MAXN) {
int *d_sum;
setValue();
cudaMalloc( (
void**)&d_sum, MAXN * sizeof(int));
cudaMemset( d_sum,
0, MAXN * sizeof(int));
KernelTest
<<<1, 128>>>(d_sum, d);
cudaMemcpy( h_sum, d_sum, MAXN
* sizeof(int), cudaMemcpyDeviceToHost);
}

int main( int argc, char **argv) {
A h_insA;

int h_sum[128];

h_insA.run_device(h_sum,
128);

for( int i = 0; i < 128; i++ ) {
std::cout
<< h_sum[i] << " ";
}
cout
<< endl;

return 0;
}
posted @ 2011-04-19 10:04  soulnearby  阅读(194)  评论(0编辑  收藏  举报