setting up a CUDA 2D “unsigned char” texture for linear interpolation

后端未结

关注

 1  377

后悔当初 2021-02-04 21:24

I have a linear array of unsigned chars representing a 2D array. I would like to place it into a CUDA 2D texture and perform (floating point) linear interpolation on it, i.e.,

1条回答

梦谈多话 (楼主)

2021-02-04 22:08

Each row of a texture must be properly aligned. This cannot be guaranteed in general if you bind the texture to a plain array (as opposed to a CUDA array). To bind plain memory to a 2D texture, you would want to allocate the memory with cudaMallocPitch(). This sets the row pitch such that it is suitable for binding to a texture. Note that it is not good practice to pass 0 as the first argument to a texture binding API call. This argument is for CUDA to return an offset to the app. If the offset is non-zero you will need to add it to the texture coordinate during texture access.

Here is a quick example that shows how to read interpolated values from a texture whose elements are unsigned char.

#include 
#include 

// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call)                                          \
do {                                                                  \
    cudaError_t err = call;                                           \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR()                                          \
do {                                                                  \
    /* Check synchronous errors, i.e. pre-launch */                   \
    cudaError_t err = cudaGetLastError();                             \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
    /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
    err = cudaThreadSynchronize();                                    \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString( err) );      \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

texture tex;

__global__ void kernel (int m, int n, float shift_x, float shift_y) 
{
    float val;
    for (int row = 0; row < m; row++) {
        for (int col = 0; col < n; col++) {
            val = tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
            printf ("%.2f  ", val);
        }
        printf ("\n");
    }
}

int main (void)
{
    int m = 4; // height = #rows
    int n = 3; // width  = #columns
    size_t pitch, tex_ofs;
    unsigned char arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
    unsigned char *arr_d = 0;

    CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
    CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
                                n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
    tex.normalized = false;
    tex.filterMode = cudaFilterModeLinear;
    CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
                                       n, m, pitch));
    if (tex_ofs !=0) {
        printf ("tex_ofs = %zu\n", tex_ofs);
        return EXIT_FAILURE;
    }
    printf ("reading array straight\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in x-direction\n");
    kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in y-direction\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    CUDA_SAFE_CALL (cudaFree (arr_d));
    return EXIT_SUCCESS;
}

The output of this program is as follows:

reading array straight
0.04  0.05  0.05
0.08  0.09  0.09
0.12  0.13  0.13
0.98  0.99  0.99
reading array shifted in x-direction
0.05  0.05  0.05
0.08  0.09  0.09
0.12  0.13  0.13
0.99  0.99  0.99
reading array shifted in y-direction
0.06  0.07  0.07
0.10  0.11  0.11
0.55  0.56  0.56
0.98  0.99  0.99

0 讨论(0)