CUDA---Arrayfire---添加cuda kernel
Arrayfire 添加一维cuda kernel
1.利用Arrayfire丰富函数,又保持cuda kernel的灵活性,在编程的时候需要添加cuda kernel
2.在arrayfire中添加cuda kernel需要注意几点:
-
在.cu文件中包含头文件"af/cuda.h"
-
使用af::eval() 确保所有的JIT kernels已经执行完成
-
使用array::device获取Arrayfire对象指针
-
定义Arrayfire's CUDA stream
-
设置好参数,在Arrayfire's stream中运行cuda kernel
-
cuda kernel运行完毕后,将这块内存的控制权还给af::array
示例:
//Arrayfire code 添加一维cuda kernel
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <arrayfire.h>
#include <af/cuda.h>
#include <iostream>
#include <cmath>
#include <cuComplex.h>
using namespace af;
typedef cuFloatComplex cufloat;
__global__ void ar_cuKernel(cuFloatComplex *c, cuFloatComplex *a, const cuFloatComplex b,int size)
{
int i = threadIdx.x;
if (i < size)
{
c[i]= cuCmulf(b,a[i]);
}
}
int main()
{
cuFloatComplex b = make_cuFloatComplex(1, 1);
size_t num = 5;
af::array x = af::seq(1, num);
af::array y = af::seq(1, num);
array c = af::complex(x, y);
c.eval();
af_print(c);
cufloat *d_x = reinterpret_cast<cufloat*>(c.device<af::cfloat>());
int af_id = af::getDevice();
int cuda_id = afcu::getNativeId(af_id);
cudaStream_t af_cuda_stream = afcu::getStream(cuda_id);
ar_cuKernel << <1, num, 0, af_cuda_stream >> > (d_x, d_x, b, 5);
c.unlock();
af_print(c);
return 0;
}