How to return a single variable from a CUDA kernel function?

前端 未结 2 1919
梦谈多话
梦谈多话 2021-02-04 03:26

I have a CUDA search function which calculate one single variable. How can I return it back.

__global__ 
void G_SearchByNameID(node* Node, long nodeCount, long s         


        
相关标签:
2条回答
  • 2021-02-04 03:38

    To get a single result you have to Memcpy it, ie:

    #include <assert.h>
    
    __global__ void g_singleAnswer(long* answer){ *answer = 2; }
    
    int main(){
    
      long h_answer;
      long* d_answer;
      cudaMalloc(&d_answer, sizeof(long));
      g_singleAnswer<<<1,1>>>(d_answer);
      cudaMemcpy(&h_answer, d_answer, sizeof(long), cudaMemcpyDeviceToHost); 
      cudaFree(d_answer);
      assert(h_answer == 2);
      return 0;
    }
    

    I guess the error come because you are passing a long value, instead of a pointer to a long value.

    0 讨论(0)
  • 2021-02-04 03:57

    I've been using __device__ variables for this purpose, that way you don't have to bother with cudaMalloc and cudaFree and you don't have to pass a pointer as a kernel argument, which saves you a register in your kernel to boot.

    __device__ long d_answer;
    
    __global__ void G_SearchByNameID() {
      d_answer = 2;
    }
    
    int main() {
      SearchByNameID<<<1,1>>>();
      typeof(d_answer) answer;
      cudaMemcpyFromSymbol(&answer, "d_answer", sizeof(answer), 0, cudaMemcpyDeviceToHost);
      printf("answer: %d\n", answer);
      return 0;
    }
    
    0 讨论(0)
提交回复
热议问题