Hi All,
I’m trying to process video images with multiple streams.
I see small improvement on my cloud GPU:
vc@atropos:/$ lspci | grep -i nvidia
00:03.0 VGA compatible controller: NVIDIA Corporation GK104GL [GRID K520] (rev a1)
Here the code. Can someone run it and tell me what times he got ?
I’m getting:
processing video_small_01.mp4 streams 2
1M resize on video_small_01.mp4 finished on 25.7128
10k resize on video_small_01.mp4 finished on 0.002545
===================
Compilation and run with:
#export DYLD_LIBRARY_PATH=“$DYLD_LIBRARY_PATH:/usr/local/boost/lib”
#export LD_LIBRARY_PATH=“$DYLD_LIBRARY_PATH:/usr/local/boost/lib”
g++ test_decoding.cpp -o test_decoding pkg-config --cflags --libs opencv
-I/usr/local/cuda/include/ -I/usr/local/boost/include -L/usr/local/cuda/lib64/ -L/usr/local/boost/lib/ -lboost_thread -lboost_system
#./test_decoding video_small_01.mp4 video_small_02.mp4 video_small_03.mp4 video_small_04.mp4
./test_decoding video_small_01.mp4 2
NOTE: Replace with real video file
=================== test_decoding.cpp ===================
//
// g++ test_CV.cpp -o test_CV pkg-config --cflags --libs opencv
// -I/usr/local/cuda/include/ -L/usr/local/cuda/lib64/
//
#include <boost/thread.hpp>
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv/ml.h>
#include <opencv/cxcore.h>
#include
#include “opencv2/opencv.hpp”
#include “opencv2/cudaimgproc.hpp”
#include “opencv2/cudawarping.hpp”
#include “opencv2/cudaobjdetect.hpp”
#include “opencv2/cudafilters.hpp”
#include “opencv2/cudacodec.hpp”
#include “opencv2/cudaarithm.hpp”
cv::cuda::Stream s[100];
//cv::Ptrcv::cudacodec::VideoReader GpuCap[4]; //= cv::cudacodec::createVideoReader();
void processfilter(std::string fname, int sn)
{
int streams = sn;
cv::cuda::GpuMat frames[1000];
cv::cuda::GpuMat outresize[2*sn];
cv::Size ksize(256,256);
cv::Ptrcv::cudacodec::VideoReader GpuCap = cv::cudacodec::createVideoReader(fname);
clock_t full_time;
for (int i=0; i<1000; i++){
GpuCap->nextFrame(frames[i]);
}
full_time = clock();
for (int j=0; j<1000; j++){
for (int i=0; i<1000; i++){
cv::cuda::resize(frames[i], outresize[i%sn], ksize,0, 0, cv::INTER_LINEAR, s[i%sn+1]);
}
}
std::cout << "1M resize on "<< fname<< " finished on " << float(clock()-full_time) / CLOCKS_PER_SEC << std::endl;
full_time = clock();
for (int j=0; j<10; j++){
for (int i=0; i<10; i++){
cv::cuda::resize(frames[i], outresize[i%sn], ksize,0, 0, cv::INTER_LINEAR, s[i%sn+1]);
}
}
std::cout << "10k resize on "<< fname<< " finished on " << float(clock()-full_time) / CLOCKS_PER_SEC << std::endl;
}
int main( int argc, char** argv )
{
int sn = atoi(argv[2]);
std::cout<<“processing " << argv[1]<<” streams "<< sn <<std::endl;
processfilter(argv[1], sn );
}