How to call CUDA code from C++

I have my source file ImageFeatures.cpp. From this file I want to call a cuda file. But I cannot get it to work.

I believe it is because of the include statement “#include “knn_cuda_without_indexes.cu””. The compiler does not understand this because it is inside a cpp file. I am not sure.

I hope you know what it is.

ImageFeatures.cpp

#include <iostream>

#include "opencv2/core/core.hpp"

#include "opencv2/features2d/features2d.hpp"

#include "opencv2/highgui/highgui.hpp"

#include "opencv2/gpu/gpu.hpp"

#include <cuda.h>

#include "knn_cuda_without_indexes.cu"

using namespace std;

using namespace cv;

using namespace cv::gpu;

void knn(float* ref_host, int ref_width, float* query_host, int query_width, int height, int k, float* dist_host);

void computeSURFPoints(string path)

{

	GpuMat image(imread(path, CV_LOAD_IMAGE_GRAYSCALE));

	SURF_GPU surf;

// detecting keypoints & computing descriptors

    GpuMat keypointsGPU;

    GpuMat descriptorsGPU;

    surf(image, GpuMat(), keypointsGPU, descriptorsGPU);

// downloading results

    vector<KeyPoint> keypoints;

    vector<float> descriptors;

    surf.downloadKeypoints(keypointsGPU, keypoints);

    surf.downloadDescriptors(descriptorsGPU, descriptors);

	

	cout << "Test" << endl;

}

void computeKnn(int k)

{

	float* ref;                 // Pointer to reference point array

        float* query;               // Pointer to query point array

        float* dist;                // Pointer to distance array

	int    ref_nb     = 4096;   // Reference point number, max=65535

	int    query_nb   = 4096;   // Query point number,     max=65535

	int    dim        = 32;     // Dimension of points

	knn(ref, ref_nb, query, query_nb, dim, k, dist);

}

int main(int argc, char* argv[])

{}

The file knn_cuda_without_indexes.cu is taken from here.

I have tried compiling and linking with:

g++ ImageFeatures.cpp `pkg-config --cflags --libs opencv` -lopencv_gpu

nvcc ImageFeatures.cpp `pkg-config --cflags --libs opencv` -lopencv_gpu

but I get the errors:

ImageFeatures.cpp:7:18: error: cuda.h: No such file or directory

In file included from ImageFeatures.cpp:8:

knn_cuda_without_indexes.cu:54: error: expected constructor, destructor, or type conversion before ‘<’ token

knn_cuda_without_indexes.cu:74: error: expected constructor, destructor, or type conversion before ‘void’

probably need to add /usr/local/cuda/include to your compile line with -I

If you mean like this:

g++ ImageFeatures.cpp pkg-config --cflags --libs opencv -lopencv_gpu -I/usr/local/cuda/include/
Then nope. I get the same errors.

If I make a Main.cu file without any c++ code then there is no problem with knn_cuda_without_indexes.cu.

If you are using CUDA C extensions, the file needs to be compiled by nvcc.
Since you are including the cuda code as an include ( bad idea, BTW), your cpp file is really a .cu file.

What do you suggest?

I have tried removing the two include statements and run these commands:

nvcc -o sample ImageFeatures.o  knn_cuda_without_indexes.o

nvcc knn_cuda_without_indexes.cu -lcuda -D_CRT_SECURE_NO_DEPRECATE -c

nvcc -o sample ImageFeatures.o  knn_cuda_without_indexes.o `pkg-config --cflags --libs opencv` -lopencv_gpu

But that didn’t work either.

I am new to C/C++ so am not so sure about compiling & linking.

These two steps:
nvcc -c knn_cuda_without_indexes.cu
g++ -o sample ImageFeature.cpp knn_cuda_without_indexes.o pkg-config --cflags --libs opencv-lopencv_gpu -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudart

should produce an executable, assuming all the CUDA C code is in knn_cuda_without_indexes.cu.

Even simpler, if ImageFeature.cpp is named ImageFeature.cu:

nvcc -o sample ImageFeature.cu knn_cuda_without_indexes.cu pkg-config --cflags --libs opencv-lopencv_gpu

Not sure if I should run this with the two include statements or not. Here is the results:

With:

In file included from ImageFeatures.cpp:8:

knn_cuda_without_indexes.cu:54: error: expected constructor, destructor, or type conversion before ‘<’ token

knn_cuda_without_indexes.cu:74: error: expected constructor, destructor, or type conversion before ‘void’

Without:

ld: warning: directory not found for option '-L/usr/local/cuda/lib64'

ld: library not found for -lcudart

collect2: ld returned 1 exit status

I have checked and I don’t have lib64. I am running on mac.

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/core/mat.hpp(381): warning: integer conversion resulted in a change of sign

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/core/mat.hpp(2096): warning: integer conversion resulted in a change of sign

ImageFeatures.cu(42): warning: variable "ref" is used before its value is set

ImageFeatures.cu(42): warning: variable "query" is used before its value is set

ImageFeatures.cu(42): warning: variable "dist" is used before its value is set

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/core/operations.hpp(2369): warning: integer conversion resulted in a change of sign

          detected during instantiation of "cv::Ptr<_Tp>::~Ptr() [with _Tp=cv::gpu::FilterEngine_GPU]" 

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/gpu/gpu.hpp(772): here

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/core/operations.hpp(2369): warning: integer conversion resulted in a change of sign

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/core/mat.hpp(381): warning: integer conversion resulted in a change of sign

/usr/local/Cellar/opencv/2.3.1a/include/opencv2/core/mat.hpp(2096): warning: integer conversion resulted in a change of sign

ImageFeatures.cu(42): warning: variable "ref" is used before its value is set

ImageFeatures.cu(42): warning: variable "query" is used before its value is set

ImageFeatures.cu(42): warning: variable "dist" is used before its value is set

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_core.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_imgproc.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_highgui.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_ml.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_video.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_features2d.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_calib3d.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_objdetect.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_contrib.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_legacy.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_flann.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

ld: warning: ignoring file /usr/local/Cellar/opencv/2.3.1a/lib/libopencv_gpu.dylib, file was built for unsupported file format which is not the architecture being linked (i386)

Undefined symbols for architecture i386:

  "cv::imread(std::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int)", referenced from:

      computeSURFPoints(std::basic_string<char, std::char_traits<char>, std::allocator<char> >)in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::gpu::GpuMat::GpuMat(cv::Mat const&)", referenced from:

      computeSURFPoints(std::basic_string<char, std::char_traits<char>, std::allocator<char> >)in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::gpu::SURF_GPU::SURF_GPU()", referenced from:

      computeSURFPoints(std::basic_string<char, std::char_traits<char>, std::allocator<char> >)in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::gpu::SURF_GPU::operator()(cv::gpu::GpuMat const&, cv::gpu::GpuMat const&, cv::gpu::GpuMat&, cv::gpu::GpuMat&, bool)", referenced from:

      computeSURFPoints(std::basic_string<char, std::char_traits<char>, std::allocator<char> >)in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::gpu::SURF_GPU::downloadKeypoints(cv::gpu::GpuMat const&, std::vector<cv::KeyPoint, std::allocator<cv::KeyPoint> >&)", referenced from:

      computeSURFPoints(std::basic_string<char, std::char_traits<char>, std::allocator<char> >)in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::gpu::SURF_GPU::downloadDescriptors(cv::gpu::GpuMat const&, std::vector<float, std::allocator<float> >&)", referenced from:

      computeSURFPoints(std::basic_string<char, std::char_traits<char>, std::allocator<char> >)in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::Mat::deallocate()", referenced from:

      cv::Mat::release()    in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::fastFree(void*)", referenced from:

      cv::Mat::~Mat() in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "cv::gpu::GpuMat::release()", referenced from:

      cv::gpu::GpuMat::~GpuMat()in tmpxft_0000207c_00000000-16_ImageFeatures.o

  "_cuInit", referenced from:

      knn(float*, int, float*, int, int, int, float*)in tmpxft_0000207c_00000000-26_knn_cuda_without_indexes.o

  "_cuCtxCreate_v2", referenced from:

      knn(float*, int, float*, int, int, int, float*)in tmpxft_0000207c_00000000-26_knn_cuda_without_indexes.o

  "_cuMemGetInfo_v2", referenced from:

      knn(float*, int, float*, int, int, int, float*)in tmpxft_0000207c_00000000-26_knn_cuda_without_indexes.o

  "_cuCtxDetach", referenced from:

      knn(float*, int, float*, int, int, int, float*)in tmpxft_0000207c_00000000-26_knn_cuda_without_indexes.o

ld: symbol(s) not found for architecture i386

collect2: ld returned 1 exit status

Change -L/usr/local/cuda/lib64 to /usr/local/cuda/lib.

The opencv library is linking the driver api, so you need to change the library linked. Use -lcuda instead of -lcudart.

It seems like your OpenCV library is expecting 64bit objects. If this is the case, you may want to add a -m64 flag to nvcc and g++.

Same errors.

If I change it to use -m64 like this:

nvcc -c knn_cuda_without_indexes.cu -m64

g++ -o sample ImageFeatures.cpp knn_cuda_without_indexes.o `pkg-config --cflags --libs opencv`-lopencv_gpu -I/usr/local/cuda/include -L/usr/local/cuda/lib -lcuda -m64

I get the following errors.

Undefined symbols for architecture x86_64:

  "_cudaGetErrorString", referenced from:

      printErrorMessage(cudaError, int) in knn_cuda_without_indexes.o

  "_cudaCreateChannelDesc", referenced from:

      cudaChannelFormatDesc cudaCreateChannelDesc<float>()  in knn_cuda_without_indexes.o

  "___cudaRegisterFatBinary", referenced from:

      __sti____cudaRegisterAll_59_tmpxft_00002264_00000000_4_knn_cuda_without_indexes_cpp1_ii_texA()      in knn_cuda_without_indexes.o

  "___cudaRegisterFunction", referenced from:

      __sti____cudaRegisterAll_59_tmpxft_00002264_00000000_4_knn_cuda_without_indexes_cpp1_ii_texA()      in knn_cuda_without_indexes.o

  "___cudaRegisterTexture", referenced from:

      __sti____cudaRegisterAll_59_tmpxft_00002264_00000000_4_knn_cuda_without_indexes_cpp1_ii_texA()      in knn_cuda_without_indexes.o

  "___cudaUnregisterFatBinary", referenced from:

      __cudaUnregisterBinaryUtil()      in knn_cuda_without_indexes.o

  "_cudaLaunch", referenced from:

      cudaError cudaLaunch<char>(char*)in knn_cuda_without_indexes.o

  "_cudaSetupArgument", referenced from:

      __device_stub__Z14cuParallelSqrtPfi(float*, int)in knn_cuda_without_indexes.o

      __device_stub__Z15cuInsertionSortPfiiii(float*, int, int, int, int)in knn_cuda_without_indexes.o

      __device_stub__Z23cuComputeDistanceGlobalPfiiS_iiiS_(float*, int, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

      __device_stub__Z24cuComputeDistanceTextureiPfiiiS_(int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaBindTextureToArray", referenced from:

      cudaError cudaBindTextureToArray<float, 2, (cudaTextureReadMode)0>(texture<float, 2, (cudaTextureReadMode)0> const&, cudaArray const*, cudaChannelFormatDesc const&)in knn_cuda_without_indexes.o

  "_cudaGetChannelDesc", referenced from:

      cudaError cudaBindTextureToArray<float, 2, (cudaTextureReadMode)0>(texture<float, 2, (cudaTextureReadMode)0> const&, cudaArray const*)in knn_cuda_without_indexes.o

  "_cudaMallocPitch", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaMallocArray", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaFree", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaMemcpyToArray", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaMemcpy2D", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaConfigureCall", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

  "_cudaFreeArray", referenced from:

      knn(float*, int, float*, int, int, int, float*)in knn_cuda_without_indexes.o

ld: symbol(s) not found for architecture x86_64

collect2: ld returned 1 exit status