问题
I'm trying to calculate the fft of an image using CUFFT. It seems like CUFFT only offers fft of plain device pointers allocated with cudaMalloc
.
My input images are allocated using cudaMallocPitch
but there is no option for handling pitch of the image pointer.
Currently, I have to remove the alignment of rows, then execute the fft, and copy back the results to the pitched pointer. My current code is as follows:
void fft_device(float* src, cufftComplex* dst, int width, int height, int srcPitch, int dstPitch)
{
//src and dst are device pointers allocated with cudaMallocPitch
//Convert them to plain pointers. No padding of rows.
float *plainSrc;
cufftComplex *plainDst;
cudaMalloc<float>(&plainSrc,width * height * sizeof(float));
cudaMalloc<cufftComplex>(&plainDst,width * height * sizeof(cufftComplex));
cudaMemcpy2D(plainSrc,width * sizeof(float),src,srcPitch,width * sizeof(float),height,cudaMemcpyDeviceToDevice);
cufftHandle handle;
cufftPlan2d(&handle,width,height,CUFFT_R2C);
cufftSetCompatibilityMode(handle,CUFFT_COMPATIBILITY_NATIVE);
cufftExecR2C(handle,plainSrc,plainDst);
cufftDestroy(handle);
cudaMemcpy2D(dst,dstPitch,plainDst,width * sizeof(cufftComplex),width * sizeof(cufftComplex),height,cudaMemcpyDeviceToDevice);
cudaFree(plainSrc);
cudaFree(plainDst);
}
It gives correct result, but I don't want to do 2 extra memory allocations and copies inside the function. I want to do something like this:
void fft_device(float* src, cufftComplex* dst, int width, int height, int srcPitch, int dstPitch)
{
//src and dst are device pointers allocated with cudaMallocPitch
//Don't know how to handle pitch here???
cufftHandle handle;
cufftPlan2d(&handle,width,height,CUFFT_R2C);
cufftSetCompatibilityMode(handle,CUFFT_COMPATIBILITY_NATIVE);
cufftExecR2C(handle,src,dst);
cufftDestroy(handle);
}
Question:
How to calculate the fft of pitched pointer directly using CUFFT?
回答1:
I think you may be interested in cufftPlanMany
which would let you do 1D, 2D, and 3D ffts with pitches. The key here is inembed and onembed parameters.
You can look up CUDA_CUFFT_Users_Guide.pdf (Pages 23-24) for more information. But for your example, you'd be doing something like the follows.
void fft_device(float* src, cufftComplex* dst,
int width, int height,
int srcPitch, int dstPitch)
{
cufftHandle handle;
int rank = 2; // 2D fft
int n[] = {width, height}; // Size of the Fourier transform
int istride = 1, ostride = 1; // Stride lengths
int idist = 1, odist = 1; // Distance between batches
int inembed[] = {srcPitch, height}; // Input size with pitch
int onembed[] = {dstPitch, height}; // Output size with pitch
int batch = 1;
cufftPlanMany(&handle, rank, n,
inembed, istride, idist,
onembed, ostride, odist, CUFFT_R2C, batch);
cufftSetCompatibilityMode(handle,CUFFT_COMPATIBILITY_NATIVE);
cufftExecR2C(handle,src,dst);
cufftDestroy(handle);
}
P.S. I did not add return checks for the sake of example here. Always check for return values in your code.
来源:https://stackoverflow.com/questions/14026900/cufft-how-to-calculate-fft-of-pitched-pointer