CUDA---Arrayfire---添加cuda kernel

Arrayfire 添加一维cuda kernel

  1.利用Arrayfire丰富函数,又保持cuda kernel的灵活性,在编程的时候需要添加cuda kernel
  2.在arrayfire中添加cuda kernel需要注意几点:
  •  在.cu文件中包含头文件"af/cuda.h" 
    
  •  使用af::eval() 确保所有的JIT kernels已经执行完成
    
  •  使用array::device获取Arrayfire对象指针
    
  •  定义Arrayfire's CUDA stream
    
  •  设置好参数,在Arrayfire's stream中运行cuda kernel
    
  •  cuda kernel运行完毕后,将这块内存的控制权还给af::array
    

示例:

//Arrayfire code 添加一维cuda kernel

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>


#include <arrayfire.h>
#include <af/cuda.h>

#include <iostream>
#include <cmath>
#include <cuComplex.h>

using namespace af;

typedef cuFloatComplex cufloat;

__global__ void ar_cuKernel(cuFloatComplex *c, cuFloatComplex *a, const cuFloatComplex b,int size)
{
	int i = threadIdx.x;
	if (i < size)
	{
		c[i]= cuCmulf(b,a[i]);
	}
}

int main()
{

	cuFloatComplex b = make_cuFloatComplex(1, 1);
        size_t num = 5;
	af::array x = af::seq(1, num);
	af::array y = af::seq(1, num);
	array c = af::complex(x, y);

	c.eval();
	af_print(c);

	cufloat *d_x = reinterpret_cast<cufloat*>(c.device<af::cfloat>()); 

	int af_id = af::getDevice();
	int cuda_id = afcu::getNativeId(af_id);
	cudaStream_t af_cuda_stream = afcu::getStream(cuda_id);

	ar_cuKernel << <1, num, 0, af_cuda_stream >> > (d_x, d_x, b, 5);

	c.unlock();

	af_print(c);
        return 0;
}
posted @ 2020-09-16 00:19  flyingswallow  阅读(338)  评论(0编辑  收藏  举报