简单的 CUDA 应用模板,白送的 Sample。
▶ 源代码
1 //template_cpu.cpp 2 extern "C" void computeGold(float *, const unsigned int); 3 4 void computeGold(float *idata, const unsigned int len) 5 { 6 const float f_len = static_cast<float>(len); 7 for (unsigned int i = 0; i < len; ++i) 8 idata[i] *= f_len; 9 }
1 // template.cu 2 #include <stdio.h> 3 #include <cuda_runtime.h> 4 #include "device_launch_parameters.h" 5 #include <helper_functions.h> 6 7 extern "C" void computeGold(float *, const unsigned int); 8 9 __global__ void testKernel(float *g_idata, float *g_odata) 10 { 11 extern __shared__ float sdata[]; 12 const unsigned int tid = threadIdx.x; 13 14 sdata[tid] = g_idata[tid]; 15 __syncthreads(); 16 sdata[tid] = (float)blockDim.x * sdata[tid]; 17 __syncthreads(); 18 g_odata[tid] = sdata[tid]; 19 } 20 21 int main() 22 { 23 printf("\n\tStart.\n"); 24 25 cudaSetDevice(0); 26 StopWatchInterface *timer = 0; 27 sdkCreateTimer(&timer); 28 sdkStartTimer(&timer); 29 30 unsigned int num_threads = 32; 31 unsigned int mem_size = sizeof(float) * num_threads; 32 float *h_idata, *h_odata, *d_idata, *d_odata; 33 h_idata = (float *)malloc(mem_size); 34 h_odata = (float *)malloc(mem_size); 35 cudaMalloc((void **) &d_idata, mem_size); 36 cudaMalloc((void **)&d_odata, mem_size); 37 for (unsigned int i = 0; i < num_threads; ++i) 38 h_idata[i] = (float)i; 39 cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice); 40 41 testKernel << < dim3(1, 1, 1), dim3(num_threads, 1, 1), mem_size >> > (d_idata, d_odata); 42 //getLastCudaError("Kernel execution failed");// 检查内核调用的报错结果 43 cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads, cudaMemcpyDeviceToHost); 44 cudaDeviceSynchronize(); 45 46 sdkStopTimer(&timer); 47 printf("\n\tProcessing time: %f ms\n", sdkGetTimerValue(&timer)); 48 sdkDeleteTimer(&timer); 49 50 computeGold(h_idata, num_threads); 51 printf("\n\tFinish, return %s.\n", compareData(h_idata, h_odata, num_threads, 0.0f,0.0f) ? "Passed" : "Failed"); 52 53 free(h_idata); 54 free(h_odata); 55 cudaFree(d_idata); 56 cudaFree(d_odata); 57 getchar(); 58 return 0; 59 }
▶ 输出结果:
Start. Processing time: 101.169357 ms Finish, return Passed.
▶ 涨姿势:没有