The different addressing modes of CUDA textures

后端未结

关注

 2  1571

耶瑟儿～ 2021-02-04 11:14

I am using a CUDA texture in border addressing mode (cudaAddressModeBorder). I am reading texture coordinates using tex2D(). When the text

2条回答

慢半拍i (楼主)

2021-02-04 11:38

As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.

The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.

To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k], with k=0,...,M-1.

cudaAddressModeClamp

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.

cudaAddressModeBorder

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.

Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.

cudaAddressModeWrap

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.

cudaAddressModeMirror

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.

The following code illustrates all the four available address modes

#include 

texture texture_clamp;
texture texture_border;
texture texture_wrap;
texture texture_mirror;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
    if (code != cudaSuccess) 
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

/******************************/
/* CUDA ADDRESS MODE CLAMPING */
/******************************/
__global__ void Test_texture_clamping(const int M) {

    printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x));
    printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x)));

}

/****************************/
/* CUDA ADDRESS MODE BORDER */
/****************************/
__global__ void Test_texture_border(const int M) {

    printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x));
    printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x)));

}

/**************************/
/* CUDA ADDRESS MODE WRAP */
/**************************/
__global__ void Test_texture_wrap(const int M) {

    printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M));
    printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M));

}

/****************************/
/* CUDA ADDRESS MODE MIRROR */
/****************************/
__global__ void Test_texture_mirror(const int M) {

    printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M));
    printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M));

}

/********/
/* MAIN */
/********/
void main(){

    const int M = 4;

    // --- Host side memory allocation and initialization
    float *h_data = (float*)malloc(M * sizeof(float));

    for (int i=0; i>>(M);

    printf("\n\n\n");

    // --- Texture border
    cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_border, d_data_border); 
    texture_border.normalized = false; 
    texture_border.addressMode[0] = cudaAddressModeBorder;

    Test_texture_border<<>>(M);

    printf("\n\n\n");

    // --- Texture wrap
    cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_wrap, d_data_wrap); 
    texture_wrap.normalized = true; 
    texture_wrap.addressMode[0] = cudaAddressModeWrap;

    Test_texture_wrap<<>>(M);

    printf("\n\n\n");

    // --- Texture mirror
    cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_mirror, d_data_mirror); 
    texture_mirror.normalized = true ; 
    texture_mirror.addressMode[0] = cudaAddressModeMirror;

    Test_texture_mirror<<>>(M);

    printf("\n\n\n");
}

Those are the outputs

index                  -7  -6  -5  -4  -3  -2  -1  0  1  2  3  4  5  6  7  8  9  10  11
clamp                   0   0   0   0   0   0   0  0  1  2  3  3  3  3  3  3  3   3   3
border                  0   0   0   0   0   0   0  0  1  2  3  0  0  0  0  0  0   0   0
wrap                    1   2   3   0   1   2   3  0  1  2  3  0  1  2  3  0  1   2   3
mirror                  1   2   3   3   2   1   0  0  1  2  3  3  2  1  0  0  1   2   3

0 讨论(0)

查看其它2个回答