I wrote a program to compare the increase in performance between SURF with CUDA and without it. The implementation with CUDA only resulted in a speed up of 1.25x times.
The following is my code.
while (true)
{
stream1.read(Frame_1);
stream1.read(Frame_2);
cvtColor(Frame_1,Frame_1,CV_BGR2GRAY);
cvtColor(Frame_2,Frame_2,CV_BGR2GRAY);
float t1,t2;
t1 = getTickCount();
Ptr<xfeatures2d::SURF> surf = xfeatures2d::SURF::create();
Mat descriptor_1,descriptor_2;
vector<KeyPoint> keypoint_1,keypoint_2;
surf->detect(Frame_1,keypoint_1);
surf->compute(Frame_1,keypoint_1,descriptor_1);
surf->detect(Frame_2,keypoint_2);
surf->compute(Frame_2,keypoint_2,descriptor_2);
Ptr<DescriptorMatcher> dscMatcher = DescriptorMatcher::create("BruteForce");
vector< vector< DMatch > > matches;
dscMatcher->knnMatch(descriptor_1,descriptor_2,matches,2);
t1 = getTickCount() - t1;
t2 = getTickCount();
cuda::SURF_CUDA surf_cuda ;
cuda::GpuMat key_1_GPU,key_2_GPU,desc_1_GPU,desc_2_GPU,img_1,img_2;
img_1.upload(Frame_1);
img_2.upload(Frame_2);
surf_cuda(img_1,cuda::GpuMat(),key_1_GPU,desc_1_GPU);
surf_cuda(img_2,cuda::GpuMat(),key_2_GPU,desc_2_GPU);
surf_cuda.downloadKeypoints(key_1_GPU,keypoint_1);
surf_cuda.downloadKeypoints(key_2_GPU,keypoint_2);
Ptr<cuda::DescriptorMatcher> dscMatcher1 = cuda::DescriptorMatcher::createBFMatcher();
vector< vector< DMatch > > matches1;
dscMatcher1->knnMatch(desc_1_GPU,desc_2_GPU,matches1,2);
t2 = getTickCount() - t2;
cout << "No cuda : " << t1/getTickFrequency() << " With Cuda : "<< t2/getTickFrequency() << endl;
if (waitKey(30) >= 0)
break;
}
The output which is the time taken gave the following results on the average. Without CUDA it took 0.54 seconds and with CUDA it took 0.43 seconds. I am implementing the code on a NVidia Jetson TX2. The images that I am processing has a size of 900 x 1440.
Then I proceeded to do the same test with ORB which only resulted in a speed up of 1.6 times.
I am wondering whether the problem is with the code I wrote or is it with the hardware. Is there a standard test to see whether the CUDA is working properly on the board?
Some details
- OS : Ubuntu 16.04
- OpenCV : 3.4
- CUDA : 9.0