Hi, all
When compiling .cu with nvcc, i can use both cg::thread_block and cg::grid_group, however,
when i using cooperatives with nvrtc i can only use cg::thread_block and can not use
cg::grid_group, it has
error: cuModuleLoadDataEx(&module, ptx, jitNumOptions, jitOptions, jitOptVals) failed with error CUDA_ERROR_INVALID_PTX
I don’t known how to solve this, I would be grateful for any advice!
And the code i used
const char *saxpy = " \n\
#include <cooperative_groups.h> \n\
__device__ void sync() { \n\
cooperative_groups::thread_block cta = cooperative_groups::this_thread_block(); \n\
cooperative_groups::grid_group grid = cooperative_groups::this_grid(); \n\
}\n\
extern \"C\" __global__ \n\
void saxpy(float a, float *x, float *y, float *out, size_t n) \n\
{ \n\
sync(); \n\
size_t tid = blockIdx.x * blockDim.x + threadIdx.x; \n\
if (tid < n) { \n\
out[tid] = a * x[tid] + y[tid]; \n\
} \n\
}";
int main(int argc, char** argv)
{
// Create an instance of nvrtcProgram with the SAXPY code string.
nvrtcProgram prog;
NVRTC_SAFE_CALL(
nvrtcCreateProgram(&prog, // prog
saxpy, // buffer
"saxpy.cu", // name
0, // numHeaders
NULL, // headers
NULL)); // includeNames
const char *opts[] = {"--include-path=/usr/local/cuda-9.0/include/", "-rdc=true", "-arch=compute_61"};
nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog
3, // numOptions
opts); // options
size_t logSize;
NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
char *log = new char[logSize];
NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
std::cout << log << '\n';
delete[] log;
if (compileResult != NVRTC_SUCCESS) {
exit(1);
}
// Obtain PTX from the program.
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
char *ptx = new char[ptxSize];
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));
// Destroy the program.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog));
// Load the generated PTX and get a handle to the SAXPY kernel.
CUdevice cuDevice;
CUcontext context;
CUmodule module;
CUfunction kernel;
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0));
CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
const unsigned int jitNumOptions = 3;
CUjit_option *jitOptions = new CUjit_option[jitNumOptions];
void **jitOptVals = new void *[jitNumOptions];
int jitTarget = 61;
jitOptions[0] = CU_JIT_TARGET;
jitOptVals[0] = (void *)(size_t)jitTarget;
jitOptions[1] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
int jitLogBufferSize = 1 << 20;
jitOptVals[1] = (void *)(size_t)jitLogBufferSize;
itOptions[2] = CU_JIT_INFO_LOG_BUFFER;
char *jitLogBuffer = new char[jitLogBufferSize];
jitOptVals[2] = jitLogBuffer;
std::cout << jitLogBuffer << std::endl;
printf("> PTX JIT log:\n%s\n", jitLogBuffer);
// set up size of compilation log buffer
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, jitNumOptions, jitOptions, jitOptVals));