cuda加速--第一个例子
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | #include "cuda_runtime.h" #include "device_launch_parameters.h" #include<stdlib.h> #include <stdio.h> cudaError_t addWithCuda( int * c, const int * a, const int * b, unsigned int size); __global__ void addKernel( int * c, const int * a, const int * b) { int i = threadIdx.x; c[i] = a[i] + b[i]; } int main() { const int arraySize = 5 ; const int a[arraySize] = { 1 , 2 , 3 , 4 , 5 }; const int b[arraySize] = { 10 , 20 , 30 , 40 , 50 }; int c[arraySize] = { 0 }; / / Add vectors in parallel. cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "addWithCuda failed!" ); return 1 ; } printf( "{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n" , c[ 0 ], c[ 1 ], c[ 2 ], c[ 3 ], c[ 4 ]); / / cudaDeviceReset must be called before exiting in order for profiling and / / tracing tools such as Nsight and Visual Profiler to show complete traces. cudaStatus = cudaDeviceReset(); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaDeviceReset failed!" ); return 1 ; } system( "pause" ); return 0 ; } / / Helper function for using CUDA to add vectors in parallel. cudaError_t addWithCuda( int * c, const int * a, const int * b, unsigned int size) { int * dev_a = 0 ; int * dev_b = 0 ; int * dev_c = 0 ; cudaError_t cudaStatus; / / Choose which GPU to run on, change this on a multi - GPU system. cudaStatus = cudaSetDevice( 0 ); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?" ); goto Error; } / / Allocate GPU buffers for three vectors (two input , one output) . cudaStatus = cudaMalloc((void * * )&dev_c, size * sizeof( int )); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaMalloc failed!" ); goto Error; } cudaStatus = cudaMalloc((void * * )&dev_a, size * sizeof( int )); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaMalloc failed!" ); goto Error; } cudaStatus = cudaMalloc((void * * )&dev_b, size * sizeof( int )); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaMalloc failed!" ); goto Error; } / / Copy input vectors from host memory to GPU buffers. cudaStatus = cudaMemcpy(dev_a, a, size * sizeof( int ), cudaMemcpyHostToDevice); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaMemcpy failed!" ); goto Error; } cudaStatus = cudaMemcpy(dev_b, b, size * sizeof( int ), cudaMemcpyHostToDevice); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaMemcpy failed!" ); goto Error; } / / Launch a kernel on the GPU with one thread for each element. addKernel<<< 1 , size>>>(dev_c, dev_a, dev_b); / / Check for any errors launching the kernel cudaStatus = cudaGetLastError(); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "addKernel launch failed: %s\n" , cudaGetErrorString(cudaStatus)); goto Error; } / / cudaDeviceSynchronize waits for the kernel to finish, and returns / / any errors encountered during the launch. cudaStatus = cudaDeviceSynchronize(); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n" , cudaStatus); goto Error; } / / Copy output vector from GPU buffer to host memory. cudaStatus = cudaMemcpy(c, dev_c, size * sizeof( int ), cudaMemcpyDeviceToHost); if (cudaStatus ! = cudaSuccess) { fprintf(stderr, "cudaMemcpy failed!" ); goto Error; } Error: cudaFree(dev_c); cudaFree(dev_a); cudaFree(dev_b); return cudaStatus; } |
分类:
CUDA编程+GPU
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
2021-01-25 使用最新目标跟踪框mmtracking实现自己的目标跟踪项目
2021-01-25 终端常用命令:----------------------->>服务器挂载
2021-01-25 我用YOLO-V5实现行人社交距离风险提示,代码开源!
2021-01-25 在win10、Ubuntu双系统下,卸载Ubuntu
2021-01-25 ubuntu安装并切换多个版本的cuda