Thrust transform throws error: “bulk_kernel_by_value: an illegal memory access was encountered”

后端 未结 1 434
谎友^
谎友^ 2021-01-07 09:12

I\'m rather new to CUDA/Thrust and have a problem with a code snippet. To make it easier I have trimmed it down to the bare minimum. The code is the following:



        
相关标签:
1条回答
  • 2021-01-07 09:18

    The address of a __device__ function, (or __host__ __device__) cannot be taken in host code, for use on the device:

    thrust::transform(X.begin(), X.end(), X.begin(), functor(&g));
                                                             ^
                                                         You will not get the 
                                                         __device__ function
                                                         address here
    

    There are many questions on stackoverflow which discuss usage of CUDA device function addresses passed via kernel calls. This answer links to several which may be of interest.

    One possible approach to fix this would be to acquire the device function address in device code, and pass it to the host, for usage like you are describing:

    $ cat t1057.cu
    #include <thrust/device_vector.h>
    #include <thrust/transform.h>
    #include <thrust/copy.h>
    #include <iostream>
    struct functor{
    functor(float (*g)(const float&)) : _g{g} {}
    
    __host__ __device__ float operator()(const float& x) const {
            return _g(x);
        }
    private:
        float (*_g)(const float&);
    };
    
    __host__ __device__ float g(const float& x){return 3*x;}
    
    __device__ float (*d_g)(const float&) = g;
    
    int main(void){
    float (*h_g)(const float&) = NULL;
    cudaMemcpyFromSymbol(&h_g, d_g, sizeof(void *));
    thrust::device_vector<float> X(4,1);
    thrust::transform(X.begin(), X.end(), X.begin(), functor(h_g));
    thrust::copy_n(X.begin(), X.size(), std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;
    }
    $ nvcc -o t1057 t1057.cu -std=c++11
    $ ./t1057
    3,3,3,3,
    $
    

    Another possible approach, leveraging the always-clever work by @m.s. here uses templating:

    $ cat t1057.cu
    #include <thrust/device_vector.h>
    #include <thrust/transform.h>
    #include <thrust/copy.h>
    #include <iostream>
    
    typedef float(*fptr_t)(const float&);
    
    template <fptr_t F>
    struct functor{
    
      __host__ __device__ float operator()(const float& x) const {
            return F(x);
        }
    };
    
    __host__ __device__ float g(const float& x){return 3*x;}
    
    
    int main(void){
    thrust::device_vector<float> X(4,1);
    thrust::transform(X.begin(), X.end(), X.begin(), functor<g>());
    thrust::copy_n(X.begin(), X.size(), std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;
    }
    $ nvcc -o t1057 t1057.cu -std=c++11
    $ ./t1057
    3,3,3,3,
    $
    
    0 讨论(0)
提交回复
热议问题