Code terminates after saying COULD NOT CREATE KERNEL on Eclipse

会有一股神秘感。 提交于 2019-12-12 04:20:02

问题


I am trying to translate a sequential C code for a MJPEG decoder into OpenCL. I got the C code from this github project.

I am now trying to convert the original C code for IDCT into OpenCL.

I copied and pasted the code from the .c file for IDCT and pasted into my .cl file which I named invCosine.cl.

invCosine.cl:

       #define  IDCT_INT_MIN   (- IDCT_INT_MAX - 1)
        #define  IDCT_INT_MAX   2147483647

        /*
         * Useful constants:
         */

        /*
         * ck = cos(k*pi/16) = s8-k = sin((8-k)*pi/16) times 1 << C_BITS and
         * rounded
         */
        #define c0_1  16384
        #define c0_s2 23170
        #define c1_1  16069
        #define c1_s2 22725

    ....
    ....
    ....
    ....
    ....
    ....

__kernel void idct1D(__global cl_int *Y) {
    cl_int z1[8], z2[8], z3[8];

    /*
     * Stage 1:
     */

    but(Y[0], Y[4], z1[1], z1[0]);
    rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]);
    but(Y[1], Y[7], z1[4], z1[7]);
    z1[5] = CMUL(sqrt2, Y[3]);
    z1[6] = CMUL(sqrt2, Y[5]);

    /*
     * Stage 2:
     */
    but(z1[0], z1[3], z2[3], z2[0]);
    but(z1[1], z1[2], z2[2], z2[1]);
    but(z1[4], z1[6], z2[6], z2[4]);
    but(z1[7], z1[5], z2[5], z2[7]);

    /*
     * Stage 3:
     */
    z3[0] = z2[0];
    z3[1] = z2[1];
    z3[2] = z2[2];
    z3[3] = z2[3];
    rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]);
    rot(0, 1, z2[5], z2[6], &z3[5], &z3[6]);

    /*
     * Final stage 4:
     */
    but(z3[0], z3[7], Y[7], Y[0]);
    but(z3[1], z3[6], Y[6], Y[1]);
    but(z3[2], z3[5], Y[5], Y[2]);
    but(z3[3], z3[4], Y[4], Y[3]);
} 

I essentially copied and pasted the #define statements from original file called idct.c. I also copied and pasted the code for the kernel from the same .c file and just added the keywords __kernel, __global and cl_int to this kernel. At the moment, I do not want to write an optimized OpenCL code. I simply want to offload the IDCT computations to the GPU.

My host code in the main file is as follows:

//////////////OpenCL parameters/////////////////////////////

            int out;
            cl_platform_id platform_id;
            cl_uint ret_num_platforms;
            cl_device_id device_id;
            cl_uint ret_num_devices;
            cl_context context;
            cl_command_queue command_queue;
            cl_program program;
            //size_t kernel_code_size;
            int *result;
            cl_int ret;
            cl_kernel kernel;
            int storeResult;


                            FILE *fp;
                            const char fileName[] = "/root/Downloads/tima_seq_version/src/invCosine.cl";
                            size_t source_size;
                            char *source_str;

                         /* Load kernel source file */
                         fp = fopen(fileName, "rb");
                         if (!fp) {
                             fprintf(stderr, "Failed to load kernel.\n");
                             exit(1);
                         }


                         source_str = (char *)malloc(MAX_SOURCE_SIZE);
                     source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
                     fclose(fp);

        /////////Set platform, context, command-queue.........../////////////////////////

            /* Get Platform */
              ret= clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
               if (ret_num_platforms == 0)
                  {
                      printf("Found 0 platforms!\n");
                  return EXIT_FAILURE;
                  }
               /* Get Device */
               ret= clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
               printf("Number of devices: %d\n", ret_num_devices);

               /* Create Context */
               context = clCreateContext(0, 1, &device_id, NULL, NULL, &ret);

               if (!context)
               {
                   printf("NO cCONTEXT\n");
                   return EXIT_FAILURE;

               }

               /* Create Command Queue */
               command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

               if (!command_queue)
               {
                   printf("NO command queue\n");
                   return EXIT_FAILURE;

               }

                /* Create kernel from source */
                          program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);

                          if (!program)
                          {
                              printf("NO PROGRAM!!!!\n");
                              return EXIT_FAILURE;
                          }

                          clBuildProgram(program, 1, &device_id, "", NULL, NULL);

                       if (ret != CL_SUCCESS) {
                              printf("building program failed\n");
                              if (ret == CL_BUILD_PROGRAM_FAILURE) {
                                  size_t log_size;
                                  clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
                                  char *log = (char *) malloc(log_size);
                                  clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
                                  printf("%s\n", log);
                              }
                          }

                 kernel= clCreateKernel(program, "idct1D", &ret);

               if( !kernel || ret != CL_SUCCESS)
               {
                   printf("-----COULD NOT CREATE KERNEL!!---\n");
                   exit(1);
               }

When I run the application, I get the following output:

Number of devices: 2
-----COULD NOT CREATE KERNEL!!---

In order to understand why the kernel is not being detected, I removed all my idct code and copied and pasted the kernel code from this link into my .cl file:

__kernel void taskParallelAdd(__global float* A, __global float* B, __global float* C)  
{   
    int base = 0;

    C[base+0] = A[base+0] + B[base+0];
    C[base+4] = A[base+4] + B[base+4];
    C[base+8] = A[base+8] + B[base+8];
    C[base+12] = A[base+12] + B[base+12];
}

When I ran the application again, it ran smoothly and did not produce the same error.

Why is my idct kernel not being detected?

P.S. I am using the Eclipse IDE for writing and running my code.


回答1:


The reason you are getting the kernel error is, you are not actually checking the error code in you clBuildProgram call. If you run ret = clBuildProgram(program, 1, &device_id, "", NULL, NULL); instead, you will get an error in clBuildProgram call.

The reason you are getting that error is, OpenCL C language does not have cl_int data type. Which means you cannot use it inside kernel. cl_* types are for host side usage. Inside a .cl file like yours, you need to use regular types such as int, float, char etc.

Assuming the code you provided has all the necessary definitions to but and rot macros, changing the kernel to use int instead of cl_int should fix your issue.



来源:https://stackoverflow.com/questions/44856649/code-terminates-after-saying-could-not-create-kernel-on-eclipse

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!