cmake编译cuda
cuda程序在教程中多是用nvcc来进行编译,但是实际项目中cuda程序往往是和c++混在一起的,所以用cmake编译会更方便
cmake目前可以较好的支持cuda编译,本文只列出其中一种方法,仅供参考
CMakeLists.txt
cmake_minimum_required(VERSION 3.15) project(test) find_package(CUDA REQUIRED) cuda_add_executable(test test.cu) target_link_libraries(test)
test.cu
#include <iostream> void __global__ add(const int* a, const int* b, int* c) { int index = threadIdx.x; c[index] = a[index] + b[index]; } int main(void) { const int N = 10; int *h_a = new int[N]; int *h_b = new int[N]; int *h_c = new int[N]; for (int i = 0; i < N; ++i) { h_a[i] = 1; h_b[i] = 2; } int *d_a, *d_b, *d_c; cudaMalloc((void**)&d_a, sizeof(int) * N); cudaMalloc((void**)&d_b, sizeof(int) * N); cudaMalloc((void**)&d_c, sizeof(int) * N); cudaMemcpy(d_a, h_a, sizeof(int) * N, cudaMemcpyHostToDevice); cudaMemcpy(d_b, h_b, sizeof(int) * N, cudaMemcpyHostToDevice); add<<<1, N>>>(d_a, d_b, d_c); cudaDeviceSynchronize(); cudaMemcpy(h_c, d_c, sizeof(int) * N, cudaMemcpyDeviceToHost); for (int i = 0; i < N; ++i) { std::cout << h_c[i] << std::endl; } delete [] h_a; delete [] h_b; delete [] h_c; cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); return 0; }
无情的摸鱼机器