How to remove CL_INVALID_PLATFORM error in opencl code?

家住魔仙堡 提交于 2020-01-04 08:05:47

问题


Doing simple matrix multiplication using OpenCL:

// Multiply two matrices A * B = C

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <oclUtils.h>

#define WA 3
#define HA 3
#define WB 3
#define HB 3
#define WC 3
#define HC 3

// Allocates a matrix with random float entries.
void randomInit(float* data, int size)
{
   for (int i = 0; i < size; ++i)
   data[i] = rand() / (float)RAND_MAX;
}

/////////////////////////////////////////////////////////
// Program main
/////////////////////////////////////////////////////////

int
main(int argc, char** argv)
{

   // set seed for rand()
   srand(2006);

   // 1. allocate host memory for matrices A and B
   unsigned int size_A = WA * HA;
   unsigned int mem_size_A = sizeof(float) * size_A;
   float* h_A = (float*) malloc(mem_size_A);

   unsigned int size_B = WB * HB;
   unsigned int mem_size_B = sizeof(float) * size_B;
   float* h_B = (float*) malloc(mem_size_B);

   // 2. initialize host memory
   randomInit(h_A, size_A);
   randomInit(h_B, size_B);

   // 3. print out A and B
   printf("\n\nMatrix A\n");
   for(int i = 0; i < size_A; i++)
   {
      printf("%f ", h_A[i]);
      if(((i + 1) % WA) == 0)
      printf("\n");
   }

   printf("\n\nMatrix B\n");
   for(int i = 0; i < size_B; i++)
   {
      printf("%f ", h_B[i]);
      if(((i + 1) % WB) == 0)
      printf("\n");
   }

   // 4. allocate host memory for the result C
   unsigned int size_C = WC * HC;
   unsigned int mem_size_C = sizeof(float) * size_C;
   float* h_C = (float*) malloc(mem_size_C);

   // 5. Initialize OpenCL
   // OpenCL specific variables
   cl_context clGPUContext;
   cl_command_queue clCommandQue;
   cl_program clProgram;
   cl_kernel clKernel;

   size_t dataBytes;
   size_t kernelLength;
   cl_int errcode;

   // OpenCL device memory for matrices
   cl_mem d_A;
   cl_mem d_B;
   cl_mem d_C;

   /*****************************************/
   /* Initialize OpenCL */
   /*****************************************/

   clGPUContext = clCreateContextFromType(0, 
                   CL_DEVICE_TYPE_GPU, 
                   NULL, NULL, &errcode);
   shrCheckError(errcode, CL_SUCCESS);

   // get the list of GPU devices associated 
   // with context
   errcode = clGetContextInfo(clGPUContext, 
              CL_CONTEXT_DEVICES, 0, NULL, 
              &dataBytes);
   cl_device_id *clDevices = (cl_device_id *)
              malloc(dataBytes);
   errcode |= clGetContextInfo(clGPUContext, 
              CL_CONTEXT_DEVICES, dataBytes, 
              clDevices, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   //Create a command-queue
   clCommandQue = clCreateCommandQueue(clGPUContext, 
                  clDevices[0], 0, &errcode);
   //shrCheckError(errcode, CL_SUCCESS);

   // Setup device memory
   d_C = clCreateBuffer(clGPUContext, 
          CL_MEM_READ_WRITE, 
          mem_size_A, NULL, &errcode);
   d_A = clCreateBuffer(clGPUContext, 
          CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
          mem_size_A, h_A, &errcode);
   d_B = clCreateBuffer(clGPUContext, 
          CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
          mem_size_B, h_B, &errcode);


   // 6. Load and build OpenCL kernel
   char *clMatrixMul = oclLoadProgSource("kernel.cl",
                        "// My comment\n", 
                        &kernelLength);
   //shrCheckError(clMatrixMul != NULL, shrTRUE);

   clProgram = clCreateProgramWithSource(clGPUContext, 
                1, (const char **)&clMatrixMul, 
                &kernelLength, &errcode);
   //shrCheckError(errcode, CL_SUCCESS);

   errcode = clBuildProgram(clProgram, 0, 
              NULL, NULL, NULL, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   clKernel = clCreateKernel(clProgram, 
               "matrixMul", &errcode);
   //shrCheckError(errcode, CL_SUCCESS);


   // 7. Launch OpenCL kernel
   size_t localWorkSize[2], globalWorkSize[2];

   int wA = WA;
   int wC = WC;
   errcode = clSetKernelArg(clKernel, 0, 
              sizeof(cl_mem), (void *)&d_C);
   errcode |= clSetKernelArg(clKernel, 1, 
              sizeof(cl_mem), (void *)&d_A);
   errcode |= clSetKernelArg(clKernel, 2, 
              sizeof(cl_mem), (void *)&d_B);
   errcode |= clSetKernelArg(clKernel, 3, 
              sizeof(int), (void *)&wA);
   errcode |= clSetKernelArg(clKernel, 4, 
              sizeof(int), (void *)&wC);
   //shrCheckError(errcode, CL_SUCCESS);

   localWorkSize[0] = 3;
   localWorkSize[1] = 3;
   globalWorkSize[0] = 3;
   globalWorkSize[1] = 3;

   errcode = clEnqueueNDRangeKernel(clCommandQue, 
              clKernel, 2, NULL, globalWorkSize, 
              localWorkSize, 0, NULL, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   // 8. Retrieve result from device
   errcode = clEnqueueReadBuffer(clCommandQue, 
              d_C, CL_TRUE, 0, mem_size_C, 
              h_C, 0, NULL, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   // 9. print out the results
   printf("\n\nMatrix C (Results)\n");
   for(int i = 0; i < size_C; i++)
   {
      printf("%f ", h_C[i]);
      if(((i + 1) % WC) == 0)
      printf("\n");
   }
   printf("\n");

   // 10. clean up memory
   free(h_A);
   free(h_B);
   free(h_C);

   clReleaseMemObject(d_A);
   clReleaseMemObject(d_C);
   clReleaseMemObject(d_B);

   free(clDevices);
   free(clMatrixMul);
   clReleaseContext(clGPUContext);
   clReleaseKernel(clKernel);
   clReleaseProgram(clProgram);
   clReleaseCommandQueue(clCommandQue);

}

In the above code I keep getting error at the place :

/**********************/ / Initialize OpenCL / /**********************/ clGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errcode); shrCheckError(errcode, CL_SUCCESS);

The error code being returned is -32 that means: CL_INVALID_PLATFORM"

How do I remove this error?

OS: Windows 7, 32 bit, NVIDIA GPU GeForce 610


回答1:


The Nvidia drivers expect you to provide a non-NULL properties pointer as first argument to the clCreateContextFromType call.

The Khronos specification for clCreateContextFromType states that if NULL is passed for the properties parameter, the platform that is selected is implementation dependent. In case of Nvidia the choice seems to be that no platform at all is selected if a NULL pointer is passed. See clCreateContextFromType for more information.

On the other hand, this behavior is consistent with Issue #3 in the cl_khr_icd extension, which would apply if you are using OpenCL through the ICD, and which states:

3: How will the ICD handle a NULL cl_platform_id?

RESOLVED: The NULL platform is not supported by the ICD.

To pass the properties to clCreateContextFromType, first query the platforms with clGetPlatformIDs. Then construct a properties array with the desired platform ID and pass it to clCreateContextFromType. Something along the following lines should work with a C99 compliant compiler:

   // query the number of platforms
   cl_uint numPlatforms;
   errcode = clGetPlatformIDs(0, NULL, &numPlatforms);
   shrCheckError(errcode, CL_SUCCESS); 

   // now get all the platform IDs
   cl_platform_id platforms[numPlatforms];
   errcode = clGetPlatformIDs(numPlatforms, platforms, NULL);
   shrCheckError(errcode, CL_SUCCESS);

   // set platform property - we just pick the first one
   cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (int) platforms[0], 0};
   clGPUContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errcode);
   shrCheckError(errcode, CL_SUCCESS);


来源:https://stackoverflow.com/questions/19140989/how-to-remove-cl-invalid-platform-error-in-opencl-code

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!