问题
I have tried to do a simple fft and compare the results between MATLAB and CUDA on 2d arrays.
MATLAB: array of 9 numbers 1-9
I = [1 2 3
4 5 6
7 8 9];
and use this code:
fft(I)
gives the results:
12.0000 + 0.0000i 15.0000 + 0.0000i 18.0000 + 0.0000i
-4.5000 + 2.5981i -4.5000 + 2.5981i -4.5000 + 2.5981i
-4.5000 - 2.5981i -4.5000 - 2.5981i -4.5000 - 2.5981i
And CUDA code:
int FFT_Test_Function() {
int width = 3;
int height = 3;
int n = width * height;
double in[width][height];
Complex out[width][height];
for (int i = 0; i<width; i++)
{
for (int j = 0; j < height; j++)
{
in[i][j] = (i * width) + j + 1;
}
}
// Allocate the buffer
cufftDoubleReal *d_in;
cufftDoubleComplex *d_out;
unsigned int out_mem_size = sizeof(cufftDoubleComplex)*n;
unsigned int in_mem_size = sizeof(cufftDoubleReal)*n;
cudaMalloc((void **)&d_in, in_mem_size);
cudaMalloc((void **)&d_out, out_mem_size);
// Save time stamp
milliseconds timeStart = getCurrentTimeStamp();
cufftHandle plan;
cufftResult res = cufftPlan2d(&plan, width, height, CUFFT_D2Z);
if (res != CUFFT_SUCCESS) { cout << "cufft plan error: " << res << endl; return 1; }
cudaCheckErrors("cuda malloc fail");
for (int i = 0; i < width; i++)
{
cudaMemcpy(d_in + (i * width), &in[i], height * sizeof(double), cudaMemcpyHostToDevice);
cudaCheckErrors("cuda memcpy H2D fail");
}
cudaCheckErrors("cuda memcpy H2D fail");
res = cufftExecD2Z(plan, d_in, d_out);
if (res != CUFFT_SUCCESS) { cout << "cufft exec error: " << res << endl; return 1; }
for (int i = 0; i < width; i++)
{
cudaMemcpy(&out[i], d_out + (i * width), height * sizeof(Complex), cudaMemcpyDeviceToHost);
cudaCheckErrors("cuda memcpy H2D fail");
}
cudaCheckErrors("cuda memcpy D2H fail");
milliseconds timeEnd = getCurrentTimeStamp();
milliseconds totalTime = timeEnd - timeStart;
std::cout << "Total time: " << totalTime.count() << std::endl;
return 0;
}
In this CUDA code i got the result:
You can see that CUDA gives different results.
What am i missed?
Thank you very much for your attention!
回答1:
The cuFFT result looks correct, but your FFT code is wrong - it should be:
octave:1> I = [ 1 2 3; 4 5 6; 7 8 9 ]
I =
1 2 3
4 5 6
7 8 9
octave:2> fft2(I)
ans =
45.00000 + 0.00000i -4.50000 + 2.59808i -4.50000 - 2.59808i
-13.50000 + 7.79423i 0.00000 + 0.00000i 0.00000 + 0.00000i
-13.50000 - 7.79423i 0.00000 - 0.00000i 0.00000 - 0.00000i
Note the use of fft2.
来源:https://stackoverflow.com/questions/44436155/cuda-fft-2d-different-results-from-matlab-fft-on-2d