setting up a CUDA 2D “unsigned char” texture for linear interpolation

后端 未结 1 377
后悔当初
后悔当初 2021-02-04 21:24

I have a linear array of unsigned chars representing a 2D array. I would like to place it into a CUDA 2D texture and perform (floating point) linear interpolation on it, i.e.,

1条回答
  •  梦谈多话
    2021-02-04 22:08

    Each row of a texture must be properly aligned. This cannot be guaranteed in general if you bind the texture to a plain array (as opposed to a CUDA array). To bind plain memory to a 2D texture, you would want to allocate the memory with cudaMallocPitch(). This sets the row pitch such that it is suitable for binding to a texture. Note that it is not good practice to pass 0 as the first argument to a texture binding API call. This argument is for CUDA to return an offset to the app. If the offset is non-zero you will need to add it to the texture coordinate during texture access.

    Here is a quick example that shows how to read interpolated values from a texture whose elements are unsigned char.

    #include 
    #include 
    
    // Macro to catch CUDA errors in CUDA runtime calls
    #define CUDA_SAFE_CALL(call)                                          \
    do {                                                                  \
        cudaError_t err = call;                                           \
        if (cudaSuccess != err) {                                         \
            fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                     __FILE__, __LINE__, cudaGetErrorString(err) );       \
            exit(EXIT_FAILURE);                                           \
        }                                                                 \
    } while (0)
    // Macro to catch CUDA errors in kernel launches
    #define CHECK_LAUNCH_ERROR()                                          \
    do {                                                                  \
        /* Check synchronous errors, i.e. pre-launch */                   \
        cudaError_t err = cudaGetLastError();                             \
        if (cudaSuccess != err) {                                         \
            fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                     __FILE__, __LINE__, cudaGetErrorString(err) );       \
            exit(EXIT_FAILURE);                                           \
        }                                                                 \
        /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
        err = cudaThreadSynchronize();                                    \
        if (cudaSuccess != err) {                                         \
            fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                     __FILE__, __LINE__, cudaGetErrorString( err) );      \
            exit(EXIT_FAILURE);                                           \
        }                                                                 \
    } while (0)
    
    texture tex;
    
    __global__ void kernel (int m, int n, float shift_x, float shift_y) 
    {
        float val;
        for (int row = 0; row < m; row++) {
            for (int col = 0; col < n; col++) {
                val = tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
                printf ("%.2f  ", val);
            }
            printf ("\n");
        }
    }
    
    int main (void)
    {
        int m = 4; // height = #rows
        int n = 3; // width  = #columns
        size_t pitch, tex_ofs;
        unsigned char arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
        unsigned char *arr_d = 0;
    
        CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
        CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
                                    n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
        tex.normalized = false;
        tex.filterMode = cudaFilterModeLinear;
        CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
                                           n, m, pitch));
        if (tex_ofs !=0) {
            printf ("tex_ofs = %zu\n", tex_ofs);
            return EXIT_FAILURE;
        }
        printf ("reading array straight\n");
        kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
        CHECK_LAUNCH_ERROR();
        CUDA_SAFE_CALL (cudaDeviceSynchronize());
        printf ("reading array shifted in x-direction\n");
        kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
        CHECK_LAUNCH_ERROR();
        CUDA_SAFE_CALL (cudaDeviceSynchronize());
        printf ("reading array shifted in y-direction\n");
        kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
        CUDA_SAFE_CALL (cudaDeviceSynchronize());
        CUDA_SAFE_CALL (cudaFree (arr_d));
        return EXIT_SUCCESS;
    }
    

    The output of this program is as follows:

    reading array straight
    0.04  0.05  0.05
    0.08  0.09  0.09
    0.12  0.13  0.13
    0.98  0.99  0.99
    reading array shifted in x-direction
    0.05  0.05  0.05
    0.08  0.09  0.09
    0.12  0.13  0.13
    0.99  0.99  0.99
    reading array shifted in y-direction
    0.06  0.07  0.07
    0.10  0.11  0.11
    0.55  0.56  0.56
    0.98  0.99  0.99
    

    0 讨论(0)
提交回复
热议问题