How to bind a float* array to a 1D texture in cuda?

前端 未结 1 1263
孤独总比滥情好
孤独总比滥情好 2021-01-27 08:25

I am trying to understand how to use the texture memory by binding it to a linear device array (not a cudaArray). My code is simple (below). I have a float* array of 8 numbers w

相关标签:
1条回答
  • 2021-01-27 08:49

    According to the documentation, tex1D() is used when the underlying allocation is a CUDA Array. For linear-memory bound textures, the correct texturing function is tex1Dfetch().

    That modification (only) to your code makes it work for me:

    $ cat t1139.cu
    #include <stdio.h>
    #include <helper_cuda.h>
    
    texture<float, 1, cudaReadModeElementType> texInput;
    
    __global__ void copyKernel(float*output, int n) {
    for (int i = 0; i < n; i++) {
        output[i] = tex1Dfetch(texInput, i);
    }
    }
    
    int main(int argc, char*argv[]) {
    
    const int WIDTH = 8;
    
    float* hInput = (float*)malloc(sizeof(float) * WIDTH);
    float*hOutput = (float*)malloc(sizeof(float) * WIDTH);
    
    for (int i = 0; i < WIDTH; i++) {
        hInput[i] = (float)i;
    }
    
    float* dInput = NULL, *dOutput = NULL;
    
    size_t offset = 0;
    
    texInput.addressMode[0] = cudaAddressModeBorder;
    texInput.addressMode[1] = cudaAddressModeBorder;
    texInput.filterMode = cudaFilterModePoint;
    texInput.normalized = false;
    
    checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
    checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));
    
    cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);
    
    cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);
    
    
    copyKernel<<<1,1>>>(dOutput, WIDTH);
    
    cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
    printf("\nInput = ");
    
    for (int i = 0; i < WIDTH; i++) {
            printf("%f\t",hInput[i]);
        }
    printf("\nOutput = ");
    for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hOutput[i]);
    }
    
    return 0;
    }
    $ nvcc -I/usr/local/cuda/samples/common/inc t1139.cu -o t1139
    $ cuda-memcheck ./t1139
    ========= CUDA-MEMCHECK
    
    Input = 0.000000        1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000
    Output = 0.000000       1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000        ========= ERROR SUMMARY: 0 errors
    $
    
    0 讨论(0)
提交回复
热议问题