cuda中二维FFT使用-cufftExecC2C

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cufft.h>
#include <opencv.hpp>

#define NX 3 
#define NY 5
#define BATCH 1
#define NRANK 2

using namespace cv;
using std::cout;
using std::endl;

static __global__ void cufftComplexScale(cufftComplex *idata, cufftComplex *odata, const int size, float scale)
{
    const int threadID = blockIdx.x * blockDim.x + threadIdx.x;
    if (threadID < size)
    {
        odata[threadID].x = idata[threadID].x * scale;
        odata[threadID].y = idata[threadID].y * scale;
    }
}
int main()
{
    float2* Host_data;
    cufftHandle plan;
    cufftComplex *Device_data;
    int n[NRANK] = { NX, NY };

    Host_data = (float2*)malloc(sizeof(float2)*NX*NY);
    cudaMalloc((void**)&Device_data, sizeof(cufftComplex)*NX*NY);

    for (int i = 0; i < NY; i++)
        for (int j = 0; j < NX; j++){
            Host_data[i*NX + j].x = i*NX + j;
            Host_data[i*NX + j].y = 0;
        }

    cudaMemcpy(Device_data, Host_data, sizeof(cufftComplex)*NX*NY, cudaMemcpyHostToDevice);

    
    cufftPlanMany(&plan, NRANK, n,
        NULL, 1, 0,
        NULL, 1, 0,
        CUFFT_C2C, BATCH);

    cufftExecC2C(plan, Device_data, Device_data, CUFFT_FORWARD);
    cufftExecC2C(plan, Device_data, Device_data, CUFFT_INVERSE);

    dim3 dimBlock(NX*NY);
    dim3 dimGrid(1); 
    cufftComplexScale << <dimGrid, dimBlock >> >(Device_data, Device_data, NX*NY, 1.0f / (NX*NY));
    cudaMemcpy(Host_data, Device_data, sizeof(cufftComplex)*NX*NY, cudaMemcpyDeviceToHost);

    cudaDeviceSynchronize();

    cufftDestroy(plan);
    cudaFree(Device_data);


    for (int i = 0; i < NY; i++)
        for (int j = 0; j < NX; j++){
            printf("%f %f\n",Host_data[i*NX + j].x, Host_data[i*NX + j].y);
    }
    
    system("pause");
    return 0;

    
}

 

posted @ 2018-02-02 18:08  aote369  阅读(1503)  评论(0编辑  收藏  举报