I have a linear array of unsigned chars representing a 2D array. I would like to place it into a CUDA 2D texture and perform (floating point) linear interpolation on it, i.e.,
Each row of a texture must be properly aligned. This cannot be guaranteed in general if you bind the texture to a plain array (as opposed to a CUDA array). To bind plain memory to a 2D texture, you would want to allocate the memory with cudaMallocPitch()
. This sets the row pitch such that it is suitable for binding to a texture. Note that it is not good practice to pass 0 as the first argument to a texture binding API call. This argument is for CUDA to return an offset to the app. If the offset is non-zero you will need to add it to the texture coordinate during texture access.
Here is a quick example that shows how to read interpolated values from a texture whose elements are unsigned char
.
#include
#include
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
texture tex;
__global__ void kernel (int m, int n, float shift_x, float shift_y)
{
float val;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
val = tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
printf ("%.2f ", val);
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
unsigned char arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
unsigned char *arr_d = 0;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
tex.normalized = false;
tex.filterMode = cudaFilterModeLinear;
CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch));
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
return EXIT_FAILURE;
}
printf ("reading array straight\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
printf ("reading array shifted in x-direction\n");
kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
printf ("reading array shifted in y-direction\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
CUDA_SAFE_CALL (cudaDeviceSynchronize());
CUDA_SAFE_CALL (cudaFree (arr_d));
return EXIT_SUCCESS;
}
The output of this program is as follows:
reading array straight
0.04 0.05 0.05
0.08 0.09 0.09
0.12 0.13 0.13
0.98 0.99 0.99
reading array shifted in x-direction
0.05 0.05 0.05
0.08 0.09 0.09
0.12 0.13 0.13
0.99 0.99 0.99
reading array shifted in y-direction
0.06 0.07 0.07
0.10 0.11 0.11
0.55 0.56 0.56
0.98 0.99 0.99