Compile cuda program with Dynamic Parallelism

Hello,
I want to compile the following code in my Jetson TX2 board for testing the Dynamic Parallelism:

#include <stdio.h>

__global__ void childKernel() {
        printf("Hello ");
}

__global__ void parentKernel() {
        childKernel<<<1,1>>>();
        cudaDeviceSynchronize();    
        printf("World!\n");
}

int main(int argc, char **argv){

        parentKernel<<<1, 1>>>();

        return 0;
}

I have compiled this code with the following CMakeLists file:

cmake_minimum_required(VERSION 2.8)

find_package(CUDA QUIET REQUIRED)

set(CUDA_SEPARABLE_COMPILATION ON)
find_package(CUDA QUIET REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)

set(
        CUDA_NVCC_FLAGS 
        ${CUDA_NVCC_FLAGS}; 
                -gencode arch=compute_35,code=sm_35 -rdc=true
)

cuda_add_executable(
        prova 
        test.cu
)

but it returns me this error:

nvlink error   : Undefined reference to 'cudaGetParameterBufferV2' in '/home/nvidia/Documents/cmake_tests/CMakeFiles/prova.dir//./prova_generated_test.cu.o'
nvlink error   : Undefined reference to 'cudaLaunchDeviceV2' in '/home/nvidia/Documents/cmake_tests/CMakeFiles/prova.dir//./prova_generated_test.cu.o'
nvlink error   : Undefined reference to 'cudaDeviceSynchronize' in '/home/nvidia/Documents/cmake_tests/CMakeFiles/prova.dir//./prova_generated_test.cu.o'
CMakeFiles/prova.dir/build.make:68: recipe for target 'CMakeFiles/prova.dir/prova_intermediate_link.o' failed
make[2]: *** [CMakeFiles/prova.dir/prova_intermediate_link.o] Error 255
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/prova.dir/all' failed
make[1]: *** [CMakeFiles/prova.dir/all] Error 2
Makefile:83: recipe for target 'all' failed
make: *** [all] Error 2

The problem occurs only when I try to compile with the options -gencode arch=compute_35,code=sm_35 -rdc=true If I compile with compute_20 capability it works; and it works well if I compile directly with NVCC

Hi,

TX2 is sm_62 GPU architecture.
Please remember to add this configuration when compiling.

Thanks.

Thanks for the answer, I have tried to set compute capability 6.2 in cmake:

cmake_minimum_required(VERSION 2.8)

find_package(CUDA QUIET REQUIRED)

set(CUDA_SEPARABLE_COMPILATION ON)
find_package(CUDA QUIET REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)

set(
        CUDA_NVCC_FLAGS 
        ${CUDA_NVCC_FLAGS}; 
        -arch=compute_62 -rdc=true -lcudadevrt
)

cuda_add_executable(
        prova 
        test.cu
)

then I have compiled with:

cmake .
make

It compiles perfectly but fails during the linking phase with the following errors:

CMakeFiles/prova.dir/prova_intermediate_link.o: In function `__cudaRegisterLinkedBinary_66_tmpxft_00001101_00000000_13_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37':
link.stub:(.text+0xcc): undefined reference to `__fatbinwrap_66_tmpxft_00001101_00000000_13_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37'
link.stub:(.text+0xd0): undefined reference to `__fatbinwrap_66_tmpxft_00001101_00000000_13_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37'
collect2: error: ld returned 1 exit status
CMakeFiles/prova.dir/build.make:200: recipe for target 'prova' failed
make[2]: *** [prova] Error 1
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/prova.dir/all' failed
make[1]: *** [CMakeFiles/prova.dir/all] Error 2
Makefile:83: recipe for target 'all' failed
make: *** [all] Error 2

Hi,

The cause is a missing definition and linking.
Try this:

cmake_minimum_required(VERSION 2.8)

find_package(CUDA QUIET REQUIRED)

set(CUDA_SEPARABLE_COMPILATION ON)
find_package(CUDA QUIET REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)

set(
        CUDA_NVCC_FLAGS 
        ${CUDA_NVCC_FLAGS}; 
                -gencode arch=compute_62,code=sm_62
)

include_directories(
        /usr/local/cuda-8.0/include/
)

link_directories(
        /usr/local/cuda-8.0/lib64/
)
cuda_add_executable(
        prova
        test.cu
)


target_link_libraries(
        prova
        -lcudadevrt
)

Thanks.