OpenCL 操作流程

摘自《OpenCL 编程指南》Page 29

HelloWorld.cpp 中的main()函数会实现或调用一组函数,完成以下操作:

(1) 在第一个可用平台上创建OpenCL上下文

(2)在第一个可用设备上创建命令队列  

(3)加载一个内核文件(HelloWorld.cl)并将它构建到程序对象中

(4)为HelloWorld.cl 中的内核函数hello_kernel()创建一个内核对象

(5)为内核函数的参数(a,b, result)创建内存对象

(6)将待执行的内核排队

(7)将内核结果读回结果缓冲区

 

(1)、(2) 对应:initContext(),

  clGetPlatformIDs()、clGetContextInfo()、clCreateContextFromType()、clCreateCommandQueue()

(3)对应: initPrograms()

clCreateProgramWithSource()、clBuildProgram()、

(4)对应:clCreateKernel    (->kernels.push_back(QCLKernel())

(5)在程序中创建参数对象,调用 clCreateBuffer等创建内存对象,供内核执行

(6)clSetKernelArg()、clEnqueueNDRangeKernel()、(在数据集上分布内核)

(7)clEnqueueReadBuffer() 从内核中读回结果

#include <cl/cl.h>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;

const int ARRAY_SIZE = 10;

cl_context createContext()
{
    cl_int errNum;
    cl_uint numPlatforms;
    cl_platform_id firstPlatformId;
    cl_context context = NULL;
    // 1.select an OpenCL platform  to run on
    errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
    if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
        cerr << "Failed to find any OpenCL platforms." << endl;
        return NULL;
    }
    // 2. create an OpenCL context on the platform 
    cl_context_properties contextProperties[] = 
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties) firstPlatformId,
        0
    };
    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
    if (errNum != CL_SUCCESS)
    {
        cout << "Could not create GPU context, trying CPU..." << endl;
        context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum);
        if(errNum != CL_SUCCESS)
        {
            cerr << "Failed to create an OpenCL GPU or CPU context. ";
            return NULL;
        }
    }
    return context;
}

cl_command_queue createCommandQueue(cl_context context, cl_device_id *device)
{
    cl_int errNum;
    cl_device_id *devices;
    cl_command_queue commandQueue = NULL;
    size_t deviceBufferSize = -1;

    // 1. get the size of the devices buffer
    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
    if (errNum != CL_SUCCESS)
    {
        cerr << "Failed call to clGetContextInfo()";
        return NULL;
    }
    if (deviceBufferSize <= 0)
    {
        cerr << "No devices available.";
        return NULL;
    }
    // 2. Allocate memory for the device buffer
    devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
    if ( errNum != CL_SUCCESS)
    {
        cerr << "Failed to get device IDs";
        return NULL;
    }
    // 3. In this example, we just choose the first available device. In a real program, you would 
    // likely use all available devices or choose the highest performance device
    commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
    if (commandQueue == NULL)
    {
        cerr << "Failed to create commandQueue for device 0";
        return NULL;
    }
    *device = devices[0];
    delete []devices;
    return commandQueue;
}

cl_program createProgram(cl_context context, cl_device_id device, const char* fileName)
{
    cl_int errNum;
    cl_program program;

    ifstream kernelFile(fileName, ios::in);
    if (!kernelFile.is_open())
    {
        cerr << "Failed to open file for reading: " << fileName << endl;
        return NULL;
    }
    ostringstream oss;
    oss << kernelFile.rdbuf();

    string srcStdStr = oss.str();
    const char *srcStr = srcStdStr.c_str();
    program = clCreateProgramWithSource(context, 1, (const char **) &srcStr, NULL, NULL);
    if (program == NULL)
    {
        cerr << "Failed to create CL program for from source.";
        return NULL;
    }
    errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (errNum != CL_SUCCESS)
    {
        // Detemine the reason for the error
        char buildLog[16384];
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL);
        cerr << "Error in kernel: " << endl;
        cerr << buildLog;
        clReleaseProgram(program);
        return NULL;
    }
    return program;

}

bool createMemObject(cl_context context, cl_mem memObjects[3], float *a, float *b)
{
    memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ARRAY_SIZE, a, NULL);
    memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ARRAY_SIZE, b, NULL);
    memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * ARRAY_SIZE, NULL, NULL);

    if(memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
    {
        cerr << "Error creating memory objects." << endl;
        return false;
    }
    return true;
}

int main(int argc, char** argv)
{
    cl_context context = 0;
    cl_command_queue commandQueue = 0;
    cl_program program = 0;
    cl_device_id device = 0;
    cl_kernel kernel = 0;
    cl_mem memObjects[3] = {0, 0, 0};
    cl_int errNum;

    // 1.  Context
    context = createContext();
    if (context == NULL)
    {
        cerr << "Failed to create OpenCL context." << endl;
            system("pause");
        return 1;
    }
    // 2. Create a command-queue on the first device available on the created context
    commandQueue = createCommandQueue(context, &device);
    if(commandQueue == NULL)
    {
        // Cleanup();
            system("pause");
        return 1;
    }
    // 3. create OpenCL program from HelloWorld.cl kernel source
    program = createProgram(context, device , "HelloWorld.cl");
    if (program == NULL)
    {
        //Cleanup
            system("pause");
        return 1;
    }
    // 4. Create OpenCL kernel
    kernel = clCreateKernel(program, "hello_kernel", NULL);
    if(kernel == NULL)
    {
        cerr << "Failed to create kernel " << endl;
        // Cleanup();
        system("pause");
        return 1;
    }
    // 5. Create memory objects that will be used as arguments to kernel.
    float result[ARRAY_SIZE];
    float a[ARRAY_SIZE];
    float b[ARRAY_SIZE];
    for (int i = 0; i < ARRAY_SIZE; i++)
    {
        a[i] = i;
        b[i] = i * 2;
    }
    if (!createMemObject(context, memObjects, a, b))
    {
        // Cleanup()
         system("pause");
        return 1;
    }
    // 6. set the kernel arguments (result, a, b)
    errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]);
    errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]);
    errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]);
    if (errNum != CL_SUCCESS)
    {
        cerr << "Error setting kernel arguments." << endl;
        // Cleanup()
        system("pause");
        return 1;
    }
    size_t globalWorkSize[1] = {ARRAY_SIZE};
    size_t localWorkSize[1] = {1};
    // 7. Queue the kernel up for execution across the array
    errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
    if (errNum != CL_SUCCESS)
    {
        cerr << "Erro queuing kernel for execution." << endl;
        // Cleanup()
        system("pause");
        return 1;
    }
    // 8. Read the output buffer back to the host
    errNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL);
    if (errNum != CL_SUCCESS)
    {
        cerr << "Error reading result buffer." << endl;
        //Cleanup();
        system("pause");
        return 1;
    }
    // 9. Output the result buffer
    for (int i = 0; i < ARRAY_SIZE; i++)
    {
        cout << result[i] << " ";
    }
    cout << endl;
    cout << "Exectued successfully. " << endl;
    // Cleanup()
    system("pause");
    return 0;
}

 注意:  clCreateBuffer()执行时只创建了内存对象(initial()), 此时需立即执行 clEnqueueNDRangeKernel() 才能完成内存中的数据a,b,result到显存的拷贝,即 这两个命令需在同一个函数区域内。否则的话,需在clCreateBuffer()之后立即使用clEnqueueWriteBuffer() (即需要read()下),手动将内容写入显存。这样可以无需立即调用clEnqueueNDRangeKernel().

 

posted on 2013-03-30 18:22  没有什么能够阻挡  阅读(967)  评论(0编辑  收藏  举报

导航