The different addressing modes of CUDA textures

后端 未结 2 1572
耶瑟儿~
耶瑟儿~ 2021-02-04 11:14

I am using a CUDA texture in border addressing mode (cudaAddressModeBorder). I am reading texture coordinates using tex2D(). When the text

相关标签:
2条回答
  • 2021-02-04 11:38

    As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.

    The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.

    To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k], with k=0,...,M-1.

    cudaAddressModeClamp

    The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.

    cudaAddressModeBorder

    The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.

    Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.

    cudaAddressModeWrap

    The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.

    cudaAddressModeMirror

    The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.

    The following code illustrates all the four available address modes

    #include <stdio.h>
    
    texture<float, 1, cudaReadModeElementType> texture_clamp;
    texture<float, 1, cudaReadModeElementType> texture_border;
    texture<float, 1, cudaReadModeElementType> texture_wrap;
    texture<float, 1, cudaReadModeElementType> texture_mirror;
    
    /********************/
    /* CUDA ERROR CHECK */
    /********************/
    #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
    {
        if (code != cudaSuccess) 
        {
            fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
            if (abort) exit(code);
        }
    }
    
    /******************************/
    /* CUDA ADDRESS MODE CLAMPING */
    /******************************/
    __global__ void Test_texture_clamping(const int M) {
    
        printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x));
        printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x)));
    
    }
    
    /****************************/
    /* CUDA ADDRESS MODE BORDER */
    /****************************/
    __global__ void Test_texture_border(const int M) {
    
        printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x));
        printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x)));
    
    }
    
    /**************************/
    /* CUDA ADDRESS MODE WRAP */
    /**************************/
    __global__ void Test_texture_wrap(const int M) {
    
        printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M));
        printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M));
    
    }
    
    /****************************/
    /* CUDA ADDRESS MODE MIRROR */
    /****************************/
    __global__ void Test_texture_mirror(const int M) {
    
        printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M));
        printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M));
    
    }
    
    /********/
    /* MAIN */
    /********/
    void main(){
    
        const int M = 4;
    
        // --- Host side memory allocation and initialization
        float *h_data = (float*)malloc(M * sizeof(float));
    
        for (int i=0; i<M; i++) h_data[i] = (float)i;
    
        // --- Texture clamping
        cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); 
        gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
        cudaBindTextureToArray(texture_clamp, d_data_clamping); 
        texture_clamp.normalized = false; 
        texture_clamp.addressMode[0] = cudaAddressModeClamp;
    
        dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1);
        Test_texture_clamping<<<dimGrid,dimBlock>>>(M);
    
        printf("\n\n\n");
    
        // --- Texture border
        cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); 
        gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
        cudaBindTextureToArray(texture_border, d_data_border); 
        texture_border.normalized = false; 
        texture_border.addressMode[0] = cudaAddressModeBorder;
    
        Test_texture_border<<<dimGrid,dimBlock>>>(M);
    
        printf("\n\n\n");
    
        // --- Texture wrap
        cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); 
        gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
        cudaBindTextureToArray(texture_wrap, d_data_wrap); 
        texture_wrap.normalized = true; 
        texture_wrap.addressMode[0] = cudaAddressModeWrap;
    
        Test_texture_wrap<<<dimGrid,dimBlock>>>(M);
    
        printf("\n\n\n");
    
        // --- Texture mirror
        cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); 
        gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
        cudaBindTextureToArray(texture_mirror, d_data_mirror); 
        texture_mirror.normalized = true ; 
        texture_mirror.addressMode[0] = cudaAddressModeMirror;
    
        Test_texture_mirror<<<dimGrid,dimBlock>>>(M);
    
        printf("\n\n\n");
    }
    

    Those are the outputs

    index                  -7  -6  -5  -4  -3  -2  -1  0  1  2  3  4  5  6  7  8  9  10  11
    clamp                   0   0   0   0   0   0   0  0  1  2  3  3  3  3  3  3  3   3   3
    border                  0   0   0   0   0   0   0  0  1  2  3  0  0  0  0  0  0   0   0
    wrap                    1   2   3   0   1   2   3  0  1  2  3  0  1  2  3  0  1   2   3
    mirror                  1   2   3   3   2   1   0  0  1  2  3  3  2  1  0  0  1   2   3
    
    0 讨论(0)
  • 2021-02-04 11:42

    As of now (CUDA 5.5), the CUDA texture fetch behavior is not customizable. Only 1 of the 4 automatic built-in modes (i.e. Border, Clamp, Wrap and Mirror) can be utilized for out of range texture fetch.

    0 讨论(0)
提交回复
热议问题