c++使用cuda对图像进行简单处理

CUDA的sample用的是openGL,初次接触有点看不懂,于是用opencv代替先进行简单的处理,学习就是循序渐进嘛。

//核函数
__global__ void chessboard(uint* input, uint *output, int w, int h)
{
    int x = blockIdx.x*blockDim.x + threadIdx.x;
    int y = blockIdx.y*blockDim.y + threadIdx.y;
    if (x % 4 == 0 && y % 4 == 0)
        output[y * w + x] = 0;
    else
        output[y * w + x] = input[y * w + x];
}

int main()
{
    Mat Img=imread("0.png",0);
    imshow("", Img);
    waitKey(0);
    int height = Img.rows;
    int width = Img.cols;
    uint *h_imagein = (uint *)malloc(sizeof(uint)*height*width);
    uint *h_imageout = (uint *)malloc(sizeof(uint)*height*width);
    uint *d_imagein = 0;
    uint *d_imageout = 0;
    cudaMalloc((void**)&d_imagein, sizeof(uint)*height*width);
    cudaMalloc((void**)&d_imageout, sizeof(uint)*height*width);
    
    for (int i = 0; i < height; i++)
    {
        uchar *data = Img.ptr<uchar>(i);
        for (int j = 0; j < width; j++)
        {
            h_imagein[i*width+j] = data[j];
        }
    }
    cudaMemcpy(d_imagein, h_imagein, sizeof(uint)*height*width, cudaMemcpyHostToDevice);
    dim3 gridSize((width + 16 - 1) / 16, (height + 16 - 1) / 16);
    dim3 blockSize(16, 16);
    chessboard << < gridSize, blockSize >> >(d_imagein, d_imageout, width, height);
    cudaMemcpy(h_imageout, d_imageout, sizeof(uint)*height*width, cudaMemcpyDeviceToHost);
    Mat putImg(Size(width, height), CV_8UC1);
    for (int i = 0; i < height; i++)
    {
        uchar*data = putImg.ptr<uchar>(i);
        for (int j = 0; j < width; j++)
        {
            data[j] =  h_imageout[i*width + j];
        }
    }
    imshow("0", putImg);
    waitKey(0);
    cudaFree(d_imagein);
    cudaFree(d_imageout);
    free(h_imagein);
    free(h_imageout);
    return 0;
}

posted @ 2018-02-07 11:02  aote369  阅读(5801)  评论(0编辑  收藏  举报