问题
I am using a CUDA texture in border addressing mode (cudaAddressModeBorder
). I am reading texture coordinates using tex2D<float>()
. When the texture coordinates fall outside the texture, tex2D<float>()
returns 0
.
How can I change this returned border value from 0
to something else? I could check the texture coordinate manually and set the border value myself. I was wondering if there was CUDA API where I can set such a border value.
回答1:
As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.
The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.
To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k]
, with k=0,...,M-1
.
cudaAddressModeClamp
The signal c[k]
is continued outside k=0,...,M-1
so that c[k] = c[0]
for k < 0
, and c[k] = c[M-1]
for k >= M
.
cudaAddressModeBorder
The signal c[k]
is continued outside k=0,...,M-1
so that c[k] = 0
for k < 0
and for k >= M
.
Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M]
, with k=0,...,M-1
.
cudaAddressModeWrap
The signal c[k / M]
is continued outside k=0,...,M-1
so that it is periodic with period equal to M
. In other words, c[(k + p * M) / M] = c[k / M]
for any (positive, negative or vanishing) integer p
.
cudaAddressModeMirror
The signal c[k / M]
is continued outside k=0,...,M-1
so that it is periodic with period equal to 2 * M - 2
. In other words, c[l / M] = c[k / M]
for any l
and k
such that (l + k)mod(2 * M - 2) = 0
.
The following code illustrates all the four available address modes
#include <stdio.h>
texture<float, 1, cudaReadModeElementType> texture_clamp;
texture<float, 1, cudaReadModeElementType> texture_border;
texture<float, 1, cudaReadModeElementType> texture_wrap;
texture<float, 1, cudaReadModeElementType> texture_mirror;
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/******************************/
/* CUDA ADDRESS MODE CLAMPING */
/******************************/
__global__ void Test_texture_clamping(const int M) {
printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x));
printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x)));
}
/****************************/
/* CUDA ADDRESS MODE BORDER */
/****************************/
__global__ void Test_texture_border(const int M) {
printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x));
printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x)));
}
/**************************/
/* CUDA ADDRESS MODE WRAP */
/**************************/
__global__ void Test_texture_wrap(const int M) {
printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M));
printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M));
}
/****************************/
/* CUDA ADDRESS MODE MIRROR */
/****************************/
__global__ void Test_texture_mirror(const int M) {
printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M));
printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M));
}
/********/
/* MAIN */
/********/
void main(){
const int M = 4;
// --- Host side memory allocation and initialization
float *h_data = (float*)malloc(M * sizeof(float));
for (int i=0; i<M; i++) h_data[i] = (float)i;
// --- Texture clamping
cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_clamp, d_data_clamping);
texture_clamp.normalized = false;
texture_clamp.addressMode[0] = cudaAddressModeClamp;
dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1);
Test_texture_clamping<<<dimGrid,dimBlock>>>(M);
printf("\n\n\n");
// --- Texture border
cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_border, d_data_border);
texture_border.normalized = false;
texture_border.addressMode[0] = cudaAddressModeBorder;
Test_texture_border<<<dimGrid,dimBlock>>>(M);
printf("\n\n\n");
// --- Texture wrap
cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_wrap, d_data_wrap);
texture_wrap.normalized = true;
texture_wrap.addressMode[0] = cudaAddressModeWrap;
Test_texture_wrap<<<dimGrid,dimBlock>>>(M);
printf("\n\n\n");
// --- Texture mirror
cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_mirror, d_data_mirror);
texture_mirror.normalized = true ;
texture_mirror.addressMode[0] = cudaAddressModeMirror;
Test_texture_mirror<<<dimGrid,dimBlock>>>(M);
printf("\n\n\n");
}
Those are the outputs
index -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11
clamp 0 0 0 0 0 0 0 0 1 2 3 3 3 3 3 3 3 3 3
border 0 0 0 0 0 0 0 0 1 2 3 0 0 0 0 0 0 0 0
wrap 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3
mirror 1 2 3 3 2 1 0 0 1 2 3 3 2 1 0 0 1 2 3
回答2:
As of now (CUDA 5.5), the CUDA texture fetch behavior is not customizable. Only 1 of the 4 automatic built-in modes (i.e. Border, Clamp, Wrap and Mirror) can be utilized for out of range texture fetch.
来源:https://stackoverflow.com/questions/19020963/the-different-addressing-modes-of-cuda-textures