CL_INVALID_COMMAND_QUEUE when clFinish

Hello All,

I am pretty new in GPU computing using OpenCL. Recently I got an error code -36 (CL_INVALID_COMMAND_QUEUE) when trying to call clFinish.
My C++ code is really long, so I just put some major functions below.

Any comment will be appreciated!

// Step 01: Get platform/device information 
	OpenCLobj.err = clGetPlatformIDs( 1, &OpenCLobj.platform_id, &OpenCLobj.ret_num_platforms ); err_check( OpenCLobj.err, "clGetPlatformIDs" );
// Step 02: Get information about the device
	OpenCLobj.err = clGetDeviceIDs( OpenCLobj.platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &OpenCLobj.device_id, &OpenCLobj.ret_num_devices ); err_check( OpenCLobj.err, "clGetDeviceIDs" );
// Step 03: Create OpenCL Context
	OpenCLobj.context = clCreateContext( NULL, 1, &OpenCLobj.device_id, NULL, NULL, &OpenCLobj.err ); err_check( OpenCLobj.err, "clCreateContext" );
// Step 04: Create Command Queue
	OpenCLobj.command_queue = clCreateCommandQueue( OpenCLobj.context, OpenCLobj.device_id, 0, &OpenCLobj.err ); err_check( OpenCLobj.err, "clCreateCommandQueue" );

// Step 06: Read kernel file
	ifstream file("kernel.cl");
	string prog( istreambuf_iterator<char>( file ), ( istreambuf_iterator<char>() ) );
	const char *source_str = prog.c_str();

// Step 07: Create Kernel program from the read in source
	OpenCLobj.program = clCreateProgramWithSource( OpenCLobj.context, 1, (const char **) &source_str, 0, &OpenCLobj.err ); err_check( OpenCLobj.err, "clCreateProgramWithSource" );
// Step 08: Build Kernel Program
	OpenCLobj.err = clBuildProgram( OpenCLobj.program, 1, &OpenCLobj.device_id, NULL, NULL, NULL );
	if(OpenCLobj.err == CL_BUILD_PROGRAM_FAILURE){
				cout << "CL_BUILD_PROGRAM_FAILURE" ;
				size_t build_log_size=sizeof(char)*900;
				char * build_log = new char[900];
				size_t build_log_ret;
				clGetProgramBuildInfo(OpenCLobj.program,OpenCLobj.device_id,CL_PROGRAM_BUILD_LOG,build_log_size,build_log,&build_log_ret);
				for(int i=0;i<(build_log_ret)/sizeof(char);i++){
              	cout << build_log[i];
				}
		}
	err_check( OpenCLobj.err, "clBuildProgram" );

// Step 09: Create OpenCL Kernel
		cl_kernel kernel = NULL;
		kernel = clCreateKernel( OpenCLobj.program, "padding_center", &OpenCLobj.err ); err_check3( OpenCLobj.err, "clCreateKernel" );
		

// Step 05: Create memory objects and tranfer the data to memory buffer
		cl_mem mobj_diffmatrix = NULL;
		mobj_diffmatrix = clCreateBuffer(OpenCLobj.context, CL_MEM_READ_WRITE, sizeof(float)*AD.Xdim*AD.Ydim, NULL, &OpenCLobj.err); err_check3( OpenCLobj.err, "clCreateBuffer" );
		OpenCLobj.err = clEnqueueWriteBuffer( OpenCLobj.command_queue, mobj_diffmatrix, CL_TRUE, 0, sizeof(float)*AD.Xdim*AD.Ydim, diffmatrix, 0, NULL, NULL ); err_check3( OpenCLobj.err, "clEnqueueWriteBuffer" );

// Step 10: Set OpenCL kernel argument
		OpenCLobj.err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &mobj_diffmatrix );	err_check3( OpenCLobj.err, "clSetKernelArg" );
		OpenCLobj.err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &mobj_tmpdiff );	err_check3( OpenCLobj.err, "clSetKernelArg" );
		
// Step 11: Execute OpenCL kernel in data parallel
		size_t work = 1000;
		size_t localwork = 1000;
		cl_event clEvent;
		for (int i = 0; i < AD.nIter; i++)
		{
			clEnqueueNDRangeKernel( OpenCLobj.command_queue, kernel, 1, NULL, &work, &localwork, 0, NULL, &clEvent ); err_check3( OpenCLobj.err, "clEnqueueNDRangeKernel" );
		}

		OpenCLobj.err = clFlush( OpenCLobj.command_queue );		err_check3( OpenCLobj.err, "clFlush" );
		OpenCLobj.err = clFinish( OpenCLobj.command_queue );	err_check3( OpenCLobj.err, "clFinish" );

I am missing a description of the problem you are seeing. What is the actual, what the expected behavior?

Thank you for the reply!
In attached code, line 56. Program tried to call clFinish to finish the command_queue. But it returns an error code -36 which is CL_INVALID_COMMAND_QUEUE.

may be of interest:

[url]https://devtalk.nvidia.com/default/topic/501409/cl_invalid_command_queue-error-on-clfinish-command-a-lot-of-operations-in-each-kernel-driver-crash/?offset=2[/url]

(i.e. you may be getting a driver reset due to exceeding a watchdog timeout)

Thanks a lot! But I was running the program on Linux, not Windows. If Linux has the same issue, is there any suggestion to solve it?

are you using X? (i.e. are you using a GUI)?

if so, see here:

[url]USING CUDA AND X | NVIDIA

Actually not, I don’t have a X Windows. The program was run by console.