Output of cuda program is not what was expected

前端未结

关注

 3  1106

#include
#include
#include
#include


__global__ void setVal(char **c){

c[(blockIdx.y * gridDim.x


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  不知归路        
                
              
                            
                2020-12-12 07:08
              
            
            
                                                                       
The main problem with your code is that you're not allocating any device memory for the setValues call.   You can't pass it a pointer to host memory (char *x[6]) and expect that to work; the CUDA kernels have to operate on CUDA memory.   You create that memory, then operate on it, then copy it back:

#include <stdio.h>
#include <string.h>
#include <cuda.h>
#include <cuda_runtime.h>

__global__ void setValues(char *arr){
    arr[blockIdx.y * gridDim.x + blockIdx.x] = '4';
}

int main() {
    const int NCHARS=6;
    char *xd;

    cudaMalloc(&xd, NCHARS);
    dim3 grid(3,2);
    setValues<<<grid,1>>>(xd);

    char *p;
    p = (char*) malloc(20*sizeof(char));
    strcpy(p,"");

    cudaMemcpy(p, xd, NCHARS, cudaMemcpyDeviceToHost);
    p[NCHARS]='\0';

    printf("<%s>\n", p);
    getchar();

    cudaFree(xd);

    return 0;
}

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  感动是毒        
                
              
                            
                2020-12-12 07:13
              
            
            
                                                                       
There are several problems I'm seeing here. Here are some of the most obvious ones:

First, my guess is that the character string constant "4" is stored in host (CPU) memory, so you would have to copy it explicitly to device (global) memory. Once the string "4" is in device memory, then you can store a pointer to "4" in a device memory value, such as an element of array arr.

Second, the array x you pass to the setValues kernel is also in host memory. Remember that you need to use cudaMalloc to allocate a (global) device memory region, which an on-device kernel can then point to.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  情深已故        
                
              
                            
                2020-12-12 07:17
              
            
            
                                                                       
Try this -- I tested it on a GTX 285 under CUDA 3.2 -- so it's a bit more restrictive than the current version, but it works.

#include<stdio.h>
#include<string.h>

__global__ void setValues(char** word)
{
    volatile char* myWord = word[blockIdx.x];

    myWord[0] = 'H';
    myWord[1] = 'o';
    myWord[2] = 'l';
    myWord[3] = 'a';
    myWord[4] = '\0';
}

int main()
{
    const size_t bufferSize = 32;
    const int nObjects = 10;

    char*  h_x[nObjects];
    char** d_x = 0;

    cudaMalloc( (void**)(&d_x), nObjects * sizeof(char*) );

    for ( int i=0; i < nObjects; i++ )
    {
        h_x[i] = NULL;
        cudaMalloc( (void**)(&h_x[i]), bufferSize * sizeof(char) );
        printf("h_x[%d] = %lx\n",i,(unsigned long)h_x[i]);
    }

    cudaMemcpy( d_x, h_x, nObjects*sizeof(char*), cudaMemcpyHostToDevice);
    printf("Copied h_x[] to d_x[]\n");

    char msg[] = "Hello World!";
    cudaMemcpy( h_x[0], msg, 13*sizeof(char), cudaMemcpyHostToDevice );

    /*  Force Thread Synchronization  */
    cudaError err = cudaThreadSynchronize();

    /*  Check for and display Error  */
    if ( cudaSuccess != err )
    {
        fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
                __FILE__, __LINE__, cudaGetErrorString( err) );
    }

    setValues<<<nObjects,1>>>(d_x);

    /*  Force Thread Synchronization  */
    err = cudaThreadSynchronize();

    /*  Check for and display Error  */
    if ( cudaSuccess != err )
    {
        fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
                __FILE__, __LINE__, cudaGetErrorString( err) );
    }

    printf("Kernel Completed Successfully.  Woot.\n\n");

    char p[bufferSize];

    printf("d_x = %lx\n", (unsigned long)d_x );
    printf("h_x = %lx\n", (unsigned long)h_x );

    cudaMemcpy( h_x, d_x, nObjects*sizeof(char*), cudaMemcpyDeviceToHost);

    printf("d_x = %lx\n", (unsigned long)d_x );
    printf("h_x = %lx\n", (unsigned long)h_x );

    for ( int i=0; i < nObjects; i++ )
    {
        cudaMemcpy( &p, h_x[i], bufferSize*sizeof(char), cudaMemcpyDeviceToHost);
        printf("%d p[] = %s\n",i,p);
    }

    /*  Force Thread Synchronization  */
    err = cudaThreadSynchronize();

    /*  Check for and display Error  */
    if ( cudaSuccess != err )
    {
        fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
                __FILE__, __LINE__, cudaGetErrorString( err) );
    }

    getchar();

    return 0;
}


As @Jon notes, you can't pass x (as you had declared) it to the GPU, because it's an address which lives on the CPU.  In the code above, I create an array of char*'s and pass them to a char** which I also allocated on the GPU.  Hope this helps!
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复