问题
I am trying to translate a sequential C code for a MJPEG decoder into OpenCL. I got the C code from this github project.
I am now trying to convert the original C code for IDCT into OpenCL.
I copied and pasted the code from the .c
file for IDCT and pasted into my .cl
file which I named invCosine.cl
.
invCosine.cl
:
#define IDCT_INT_MIN (- IDCT_INT_MAX - 1)
#define IDCT_INT_MAX 2147483647
/*
* Useful constants:
*/
/*
* ck = cos(k*pi/16) = s8-k = sin((8-k)*pi/16) times 1 << C_BITS and
* rounded
*/
#define c0_1 16384
#define c0_s2 23170
#define c1_1 16069
#define c1_s2 22725
....
....
....
....
....
....
__kernel void idct1D(__global cl_int *Y) {
cl_int z1[8], z2[8], z3[8];
/*
* Stage 1:
*/
but(Y[0], Y[4], z1[1], z1[0]);
rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]);
but(Y[1], Y[7], z1[4], z1[7]);
z1[5] = CMUL(sqrt2, Y[3]);
z1[6] = CMUL(sqrt2, Y[5]);
/*
* Stage 2:
*/
but(z1[0], z1[3], z2[3], z2[0]);
but(z1[1], z1[2], z2[2], z2[1]);
but(z1[4], z1[6], z2[6], z2[4]);
but(z1[7], z1[5], z2[5], z2[7]);
/*
* Stage 3:
*/
z3[0] = z2[0];
z3[1] = z2[1];
z3[2] = z2[2];
z3[3] = z2[3];
rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]);
rot(0, 1, z2[5], z2[6], &z3[5], &z3[6]);
/*
* Final stage 4:
*/
but(z3[0], z3[7], Y[7], Y[0]);
but(z3[1], z3[6], Y[6], Y[1]);
but(z3[2], z3[5], Y[5], Y[2]);
but(z3[3], z3[4], Y[4], Y[3]);
}
I essentially copied and pasted the #define
statements from original file called idct.c
. I also copied and pasted the code for the kernel from the same .c
file and just added the keywords __kernel
, __global
and cl_int
to this kernel. At the moment, I do not want to write an optimized OpenCL code. I simply want to offload the IDCT computations to the GPU.
My host code in the main file is as follows:
//////////////OpenCL parameters/////////////////////////////
int out;
cl_platform_id platform_id;
cl_uint ret_num_platforms;
cl_device_id device_id;
cl_uint ret_num_devices;
cl_context context;
cl_command_queue command_queue;
cl_program program;
//size_t kernel_code_size;
int *result;
cl_int ret;
cl_kernel kernel;
int storeResult;
FILE *fp;
const char fileName[] = "/root/Downloads/tima_seq_version/src/invCosine.cl";
size_t source_size;
char *source_str;
/* Load kernel source file */
fp = fopen(fileName, "rb");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/////////Set platform, context, command-queue.........../////////////////////////
/* Get Platform */
ret= clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
if (ret_num_platforms == 0)
{
printf("Found 0 platforms!\n");
return EXIT_FAILURE;
}
/* Get Device */
ret= clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
printf("Number of devices: %d\n", ret_num_devices);
/* Create Context */
context = clCreateContext(0, 1, &device_id, NULL, NULL, &ret);
if (!context)
{
printf("NO cCONTEXT\n");
return EXIT_FAILURE;
}
/* Create Command Queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
if (!command_queue)
{
printf("NO command queue\n");
return EXIT_FAILURE;
}
/* Create kernel from source */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
if (!program)
{
printf("NO PROGRAM!!!!\n");
return EXIT_FAILURE;
}
clBuildProgram(program, 1, &device_id, "", NULL, NULL);
if (ret != CL_SUCCESS) {
printf("building program failed\n");
if (ret == CL_BUILD_PROGRAM_FAILURE) {
size_t log_size;
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char *) malloc(log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
printf("%s\n", log);
}
}
kernel= clCreateKernel(program, "idct1D", &ret);
if( !kernel || ret != CL_SUCCESS)
{
printf("-----COULD NOT CREATE KERNEL!!---\n");
exit(1);
}
When I run the application, I get the following output:
Number of devices: 2
-----COULD NOT CREATE KERNEL!!---
In order to understand why the kernel is not being detected, I removed all my idct code and copied and pasted the kernel code from this link into my .cl file:
__kernel void taskParallelAdd(__global float* A, __global float* B, __global float* C)
{
int base = 0;
C[base+0] = A[base+0] + B[base+0];
C[base+4] = A[base+4] + B[base+4];
C[base+8] = A[base+8] + B[base+8];
C[base+12] = A[base+12] + B[base+12];
}
When I ran the application again, it ran smoothly and did not produce the same error.
Why is my idct kernel not being detected?
P.S. I am using the Eclipse IDE for writing and running my code.
回答1:
The reason you are getting the kernel error is, you are not actually checking the error code in you clBuildProgram
call. If you run ret = clBuildProgram(program, 1, &device_id, "", NULL, NULL);
instead, you will get an error in clBuildProgram call.
The reason you are getting that error is, OpenCL C language does not have cl_int
data type. Which means you cannot use it inside kernel. cl_* types are for host side usage. Inside a .cl file like yours, you need to use regular types such as int, float, char etc.
Assuming the code you provided has all the necessary definitions to but
and rot
macros, changing the kernel to use int
instead of cl_int
should fix your issue.
来源:https://stackoverflow.com/questions/44856649/code-terminates-after-saying-could-not-create-kernel-on-eclipse