cuda中二维FFT使用-cufftExecC2C
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> #include <iostream> #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <cufft.h> #include <opencv.hpp> #define NX 3 #define NY 5 #define BATCH 1 #define NRANK 2 using namespace cv; using std::cout; using std::endl; static __global__ void cufftComplexScale(cufftComplex *idata, cufftComplex *odata, const int size, float scale) { const int threadID = blockIdx.x * blockDim.x + threadIdx.x; if (threadID < size) { odata[threadID].x = idata[threadID].x * scale; odata[threadID].y = idata[threadID].y * scale; } } int main() { float2* Host_data; cufftHandle plan; cufftComplex *Device_data; int n[NRANK] = { NX, NY }; Host_data = (float2*)malloc(sizeof(float2)*NX*NY); cudaMalloc((void**)&Device_data, sizeof(cufftComplex)*NX*NY); for (int i = 0; i < NY; i++) for (int j = 0; j < NX; j++){ Host_data[i*NX + j].x = i*NX + j; Host_data[i*NX + j].y = 0; } cudaMemcpy(Device_data, Host_data, sizeof(cufftComplex)*NX*NY, cudaMemcpyHostToDevice); cufftPlanMany(&plan, NRANK, n, NULL, 1, 0, NULL, 1, 0, CUFFT_C2C, BATCH); cufftExecC2C(plan, Device_data, Device_data, CUFFT_FORWARD); cufftExecC2C(plan, Device_data, Device_data, CUFFT_INVERSE); dim3 dimBlock(NX*NY); dim3 dimGrid(1); cufftComplexScale << <dimGrid, dimBlock >> >(Device_data, Device_data, NX*NY, 1.0f / (NX*NY)); cudaMemcpy(Host_data, Device_data, sizeof(cufftComplex)*NX*NY, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cufftDestroy(plan); cudaFree(Device_data); for (int i = 0; i < NY; i++) for (int j = 0; j < NX; j++){ printf("%f %f\n",Host_data[i*NX + j].x, Host_data[i*NX + j].y); } system("pause"); return 0; }