cuda - Zero-copy memory, memory-mapped file

前端 未结 1 1548
小蘑菇
小蘑菇 2020-12-18 07:33

I am trying to create a mapped memory file, containing uint32_ts, and then use that as zero-copy pinned memory as shown below for CUDA. I am getting the c

相关标签:
1条回答
  • 2020-12-18 08:25

    One problem is that the logical sequence of your program is incorrect. This line assigns a value to data provided by the CUDA API:

    err = cudaHostAlloc((void**)&data, data_file_size, cudaHostAllocMapped);
    

    This line then overwrites that value, with a new one:

    data = (uint32_t*) mmap(0, data_file_size, PROT_READ, MAP_PRIVATE, data_file, 0);
    

    At that point, the value of data is not recognized by the CUDA API as being a pinned memory space anymore, so when you call this:

    err = cudaHostGetDevicePointer((void**)&dev_data, (void*)data, 0);
    

    you get an error, because the value contained in data is not recognized.

    EDIT: (based on this question) Apart from that issue, it seems that if you change the file handling from read-only, to read-write, then this process can be made to work (throws no runtime errors). Here's a complete code (which doesn't contain the above logical flaw) that demonstrates this (I have previously created a test.dat file of size 566316 bytes):

    $ cat t706.cu
    #include <stdio.h>
    #include <stdlib.h>
    #include <sys/mman.h>
    #include <sys/stat.h>
    #include <fcntl.h>
    #include <unistd.h>
    #include <stdint.h>
    
    int main(void)
    {
      struct stat buf;
    
      char *dev_data;
    
      cudaDeviceProp cuda_prop;
      cudaGetDeviceProperties(&cuda_prop, 0);
      if (!cuda_prop.canMapHostMemory)
        exit(EXIT_FAILURE);
    
      cudaSetDeviceFlags(cudaDeviceMapHost);
    
    
      int data_file = open("test.dat", O_RDWR);
      int stat = fstat(data_file, &buf);
      int data_file_size = buf.st_size;
      printf("data_file_size = %d\n", data_file_size);
      char *data = (char *) mmap(0, data_file_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, data_file, 0);
      if (data == NULL) {
        printf("mmap failure\n");
        exit(EXIT_FAILURE);}
      cudaError_t err = cudaHostRegister(data, data_file_size, cudaHostRegisterDefault);
      if (err != cudaSuccess) { //ERROR HERE.
        printf("cudaHostRegister fail\n");
        exit(EXIT_FAILURE);}
    
      err = cudaHostGetDevicePointer((void**)&dev_data, (void*)data, 0);
      if (err == cudaErrorMemoryAllocation)
      {
        printf("cudaHostGetDevicePointer - Mem Alloc Err\n");
        exit(EXIT_FAILURE);
      }
      else if (err == cudaErrorInvalidValue)
      {
        printf("cudaHostGetDevicePointer - Invalid Val Err\n");
        exit(EXIT_FAILURE);
      }
    
    }
    $ nvcc -arch=sm_30 -o t706 t706.cu
    $ ./t706
    data_file_size = 566316
    $
    
    0 讨论(0)
提交回复
热议问题