c++使用cuda对图像进行简单处理
CUDA的sample用的是openGL,初次接触有点看不懂,于是用opencv代替先进行简单的处理,学习就是循序渐进嘛。
//核函数 __global__ void chessboard(uint* input, uint *output, int w, int h) { int x = blockIdx.x*blockDim.x + threadIdx.x; int y = blockIdx.y*blockDim.y + threadIdx.y; if (x % 4 == 0 && y % 4 == 0) output[y * w + x] = 0; else output[y * w + x] = input[y * w + x]; } int main() { Mat Img=imread("0.png",0); imshow("", Img); waitKey(0); int height = Img.rows; int width = Img.cols; uint *h_imagein = (uint *)malloc(sizeof(uint)*height*width); uint *h_imageout = (uint *)malloc(sizeof(uint)*height*width); uint *d_imagein = 0; uint *d_imageout = 0; cudaMalloc((void**)&d_imagein, sizeof(uint)*height*width); cudaMalloc((void**)&d_imageout, sizeof(uint)*height*width); for (int i = 0; i < height; i++) { uchar *data = Img.ptr<uchar>(i); for (int j = 0; j < width; j++) { h_imagein[i*width+j] = data[j]; } } cudaMemcpy(d_imagein, h_imagein, sizeof(uint)*height*width, cudaMemcpyHostToDevice); dim3 gridSize((width + 16 - 1) / 16, (height + 16 - 1) / 16); dim3 blockSize(16, 16); chessboard << < gridSize, blockSize >> >(d_imagein, d_imageout, width, height); cudaMemcpy(h_imageout, d_imageout, sizeof(uint)*height*width, cudaMemcpyDeviceToHost); Mat putImg(Size(width, height), CV_8UC1); for (int i = 0; i < height; i++) { uchar*data = putImg.ptr<uchar>(i); for (int j = 0; j < width; j++) { data[j] = h_imageout[i*width + j]; } } imshow("0", putImg); waitKey(0); cudaFree(d_imagein); cudaFree(d_imageout); free(h_imagein); free(h_imageout); return 0; }