Very simple OpenCL kernel failed with CL_INVALID_COMMAND_QUEUE
Would be appreciate if somebody can help. I have a small kernel that always finishes with CL_INVALID_COMMAND_QUEUE error. I've tried on different hardware gtx 765m or gtx 980, the result is the same. Any ideas? Here is the code host + kernel: //get all platforms (drivers) std::vector<cl::Platform> all_platforms; cl::Platform::get(&all_platforms); if(all_platforms.size()==0){ std::cout<<" No platforms found. Check OpenCL installation!\n"; exit(1); } cl::Platform default_platform=all_platforms[1]; std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<" \n"; //get default device of the default platform std::vector<cl:evice> all_devices; default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); if(all_devices.size()==0){ std::cout<<" No devices found. Check OpenCL installation!\n"; exit(1); } cl:evice default_device=all_devices[0]; std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<" \n"; cl::Context context({default_device}); cl::Program::Sources sources; std::string kernel_code= "__kernel void test(__global float* A,__global float* R) {" "int i = get_global_id(0);" "if(i>=1075021) return;" "if(i<60000) {" "R[i]=0;" "return;" "};" "float vm=0.f;" "for(int j=i-60000;j<=i;++j)" "vm+=A[j];" "R[i]=vm;" "};"; sources.push_back({kernel_code.c_str(),kernel_code .length()}); cl::Program program(context,sources); if(program.build({default_device})!=CL_SUCCESS){ std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(defa ult_device)<<"\n"; exit(1); } size_t n=1075021; // create buffers on the device cl::Buffer buffer_A(context,CL_MEM_READ_WRITE,sizeof(float)*n ); cl::Buffer buffer_R(context,CL_MEM_READ_WRITE,sizeof(float)*n ); float *A = new float[n]; float *R = new float[n]; srand (time(NULL)); for(size_t i=0;i<n;++i) A[i]=rand()%10; cl::CommandQueue queue(context,default_device); cl_int ret; ret=queue.enqueueWriteBuffer(buffer_A,CL_TRUE,0,si zeof(float)*n,A); ret=queue.finish(); cl::Kernel kernel_test=cl::Kernel(program,"test"); kernel_test.setArg(0,buffer_A); kernel_test.setArg(1,buffer_R); size_t max_work_size=1024; size_t num_work_groups = (n-1) / max_work_size + 1; size_t global_size_padded = num_work_groups * max_work_size; queue.enqueueNDRangeKernel(kernel_test,cl::NullRan ge,cl::NDRange(global_size_padded),cl::NDRange(max _work_size)); ret=queue.finish(); ret=queue.enqueueReadBuffer(buffer_R,CL_TRUE,0,siz eof(float)*n,R);
Would be appreciate if somebody can help. I have a small kernel that always finishes with CL_INVALID_COMMAND_QUEUE error. I've tried on different hardware gtx 765m or gtx 980, the result is the same.
Any ideas? Here is the code host + kernel:

//get all platforms (drivers)
std::vector<cl::Platform> all_platforms;
cl::Platform::get(&all_platforms);
if(all_platforms.size()==0){
std::cout<<" No platforms found. Check OpenCL installation!\n";
exit(1);
}
cl::Platform default_platform=all_platforms[1];
std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<" \n";

//get default device of the default platform
std::vector<cl:evice> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if(all_devices.size()==0){
std::cout<<" No devices found. Check OpenCL installation!\n";
exit(1);
}
cl:evice default_device=all_devices[0];
std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<" \n";

cl::Context context({default_device});

cl::Program::Sources sources;

std::string kernel_code=
"__kernel void test(__global float* A,__global float* R) {"
"int i = get_global_id(0);"
"if(i>=1075021) return;"
"if(i<60000) {"
"R[i]=0;"
"return;"
"};"
"float vm=0.f;"
"for(int j=i-60000;j<=i;++j)"
"vm+=A[j];"
"R[i]=vm;"
"};";

sources.push_back({kernel_code.c_str(),kernel_code .length()});

cl::Program program(context,sources);
if(program.build({default_device})!=CL_SUCCESS){
std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(defa ult_device)<<"\n";
exit(1);
}

size_t n=1075021;
// create buffers on the device
cl::Buffer buffer_A(context,CL_MEM_READ_WRITE,sizeof(float)*n );
cl::Buffer buffer_R(context,CL_MEM_READ_WRITE,sizeof(float)*n );

float *A = new float[n];
float *R = new float[n];

srand (time(NULL));

for(size_t i=0;i<n;++i)
A[i]=rand()%10;

cl::CommandQueue queue(context,default_device);

cl_int ret;
ret=queue.enqueueWriteBuffer(buffer_A,CL_TRUE,0,si zeof(float)*n,A);
ret=queue.finish();

cl::Kernel kernel_test=cl::Kernel(program,"test");
kernel_test.setArg(0,buffer_A);
kernel_test.setArg(1,buffer_R);

size_t max_work_size=1024;
size_t num_work_groups = (n-1) / max_work_size + 1;
size_t global_size_padded = num_work_groups * max_work_size;

queue.enqueueNDRangeKernel(kernel_test,cl::NullRan ge,cl::NDRange(global_size_padded),cl::NDRange(max _work_size));
ret=queue.finish();

ret=queue.enqueueReadBuffer(buffer_R,CL_TRUE,0,siz eof(float)*n,R);

#1
Posted 07/04/2017 07:49 AM   
Hi, Profiling error or just the app error without nsight?
Hi,

Profiling error or just the app error without nsight?

#2
Posted 07/06/2017 02:32 AM   
Scroll To Top

Add Reply