I cannot comment about NVIDIA’s evaluation, but it should not be that difficult to apply correction with GPU once you have the opencv correction maps for x and y in float format.
You would first need an opencv version built with CUDA support. Here I’ve been using a 4.2.0 dev version.
This example is a simplified version of nvivafilter plugin. Its sources are available in public_sources.tbz2.
Basically, this example uses constant 640x480 resolution. So you would declare these const and variables:
#include "opencv2/core.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/cudawarping.hpp"
const int max_width = 640;
const int max_height = 480;
static cv::cuda::GpuMat gpu_xmap, gpu_ymap;
In Init() function you would set your xmap and ymap (load your ones the way you want):
init (CustomerFunction * pFuncs)
{
pFuncs->fPreProcess = pre_process;
pFuncs->fGPUProcess = gpu_process;
pFuncs->fPostProcess = post_process;
/* Initialize maps from CPU. */
cv::Mat xmap(max_height, max_width, CV_32FC1);
cv::Mat ymap(max_height, max_width, CV_32FC1);
//fill matrices with your values
cv::Mat cam(3, 3, cv::DataType<float>::type);
cam.at<float>(0, 0) = 528.53618582196384f;
cam.at<float>(0, 1) = 0.0f;
cam.at<float>(0, 2) = 314.01736116032430f;
cam.at<float>(1, 0) = 0.0f;
cam.at<float>(1, 1) = 532.01912214324500f;
cam.at<float>(1, 2) = 231.43930864205211f;
cam.at<float>(2, 0) = 0.0f;
cam.at<float>(2, 1) = 0.0f;
cam.at<float>(2, 2) = 1.0f;
cv::Mat dist(4, 1, cv::DataType<float>::type);
dist.at<float>(0, 0) = -0.11839989180635836f;
dist.at<float>(1, 0) = 0.25425420873955445f;
dist.at<float>(2, 0) = 0.0013269901775205413f;
dist.at<float>(3, 0) = 0.0015787467748277866f;
cv::fisheye::initUndistortRectifyMap(cam, dist, cv::Mat(), cam, cv::Size(max_width, max_height), CV_32FC1, xmap, ymap);
/* upload to GpuMats */
gpu_xmap.upload(xmap);
gpu_ymap.upload(ymap);
}
Once this is done, it’s ready for remapping frames. You would process each frame this way:
static void cv_process_RGBA(void *pdata, int32_t width, int32_t height)
{
cv::cuda::GpuMat d_Mat_RGBA(height, width, CV_8UC4, pdata);
cv::cuda::GpuMat d_Mat_RGBA_Src;
d_Mat_RGBA.copyTo(d_Mat_RGBA_Src); // cannot avoid one copy
cv::cuda::remap(d_Mat_RGBA_Src, d_Mat_RGBA, gpu_xmap, gpu_ymap, cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(0.f, 0.f, 0.f, 0.f));
// Check
if(d_Mat_RGBA.data != pdata)
std::cerr << "Error reallocated buffer for d_Mat_RGBA" << std::endl;
}
Last thing would be to call this processing when an RGBA (or ABGR) frame is received. In function gpu_process(), you would change the relevant section to:
if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_ABGR) {
cv_process_RGBA(eglFrame.frame.pPitch[0], eglFrame.width, eglFrame.height);
} else if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR) {
printf ("Invalid eglcolorformat NV12\n");
} else
printf ("Invalid eglcolorformat %d\n", eglFrame.eglColorFormat);
}
Note that in older L4T releases, it was instead CU_EGL_COLOR_FORMAT_BGRA and the codes did change, so it is not binary compatible between versions.
Adapt the makefile to your opencv install directory :
CVCCFLAGS:=-I$(OPENCV_DIR)/include/opencv4
CVLDFLAGS:=-L$(OPENCV_DIR)/lib -lopencv_core -lopencv_calib3d -lopencv_cudawarping
Build with make and test with:
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$OPENCV_DIR/lib
gst-launch-1.0 videotestsrc ! video/x-raw, width=640, height=480, framerate=30/1 ! nvvidconv ! 'video/x-raw(memory:NVMM), format=NV12, width=640, height=480' ! nvivafilter customer-lib-name=./lib-gst-custom-opencv_cudaprocess.so cuda-process=true ! 'video/x-raw(memory:NVMM), format=RGBA, width=640, height=480' ! nvoverlaysink
Attachments
Main source to be saved as gst-custom-opencv_cudaprocess.cu
:
/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <cuda.h>
#include "opencv2/core.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/cudawarping.hpp"
#include "cudaEGL.h"
#if defined(__cplusplus)
extern "C" void Handle_EGLImage (EGLImageKHR image);
extern "C" {
#endif
typedef enum {
COLOR_FORMAT_Y8 = 0,
COLOR_FORMAT_U8_V8,
COLOR_FORMAT_RGBA,
COLOR_FORMAT_NONE
} ColorFormat;
typedef struct {
/**
* cuda-process API
*
* @param image : EGL Image to process
* @param userPtr : point to user alloc data, should be free by user
*/
void (*fGPUProcess) (EGLImageKHR image, void ** userPtr);
/**
* pre-process API
*
* @param sBaseAddr : Mapped Surfaces(YUV) pointers
* @param smemsize : surfaces size array
* @param swidth : surfaces width array
* @param sheight : surfaces height array
* @param spitch : surfaces pitch array
* @param sformat : surfaces format array
* @param nsurfcount : surfaces count
* @param userPtr : point to user alloc data, should be free by user
*/
void (*fPreProcess)(void **sBaseAddr,
unsigned int *smemsize,
unsigned int *swidth,
unsigned int *sheight,
unsigned int *spitch,
ColorFormat *sformat,
unsigned int nsurfcount,
void ** userPtr);
/**
* post-process API
*
* @param sBaseAddr : Mapped Surfaces(YUV) pointers
* @param smemsize : surfaces size array
* @param swidth : surfaces width array
* @param sheight : surfaces height array
* @param spitch : surfaces pitch array
* @param sformat : surfaces format array
* @param nsurfcount : surfaces count
* @param userPtr : point to user alloc data, should be free by user
*/
void (*fPostProcess)(void **sBaseAddr,
unsigned int *smemsize,
unsigned int *swidth,
unsigned int *sheight,
unsigned int *spitch,
ColorFormat *sformat,
unsigned int nsurfcount,
void ** userPtr);
} CustomerFunction;
void init (CustomerFunction * pFuncs);
#if defined(__cplusplus)
}
#endif
/**
* Dummy custom pre-process API implematation.
* It just access mapped surface userspace pointer &
* memset with specific pattern modifying pixel-data in-place.
*
* @param sBaseAddr : Mapped Surfaces pointers
* @param smemsize : surfaces size array
* @param swidth : surfaces width array
* @param sheight : surfaces height array
* @param spitch : surfaces pitch array
* @param nsurfcount : surfaces count
*/
static void
pre_process (void **sBaseAddr,
unsigned int *smemsize,
unsigned int *swidth,
unsigned int *sheight,
unsigned int *spitch,
ColorFormat *sformat,
unsigned int nsurfcount,
void ** usrptr)
{
/* add your custom pre-process here
we draw a green block for demo */
printf ("pre-process %dx%d size %d\n", *swidth, *sheight, *smemsize);
}
/**
* Dummy custom post-process API implematation.
* It just access mapped surface userspace pointer &
* memset with specific pattern modifying pixel-data in-place.
*
* @param sBaseAddr : Mapped Surfaces pointers
* @param smemsize : surfaces size array
* @param swidth : surfaces width array
* @param sheight : surfaces height array
* @param spitch : surfaces pitch array
* @param nsurfcount : surfaces count
*/
static void
post_process (void **sBaseAddr,
unsigned int *smemsize,
unsigned int *swidth,
unsigned int *sheight,
unsigned int *spitch,
ColorFormat *sformat,
unsigned int nsurfcount,
void ** usrptr)
{
/* add your custom post-process here
we draw a green block for demo */
printf ("post-process %dx%d size %d\n", *swidth, *sheight, *smemsize);
}
static cv::cuda::GpuMat gpu_xmap, gpu_ymap;
static void cv_process_RGBA(void *pdata, int32_t width, int32_t height)
{
cv::cuda::GpuMat d_Mat_RGBA(height, width, CV_8UC4, pdata);
cv::cuda::GpuMat d_Mat_RGBA_Src;
d_Mat_RGBA.copyTo(d_Mat_RGBA_Src); // cannot avoid one copy
cv::cuda::remap(d_Mat_RGBA_Src, d_Mat_RGBA, gpu_xmap, gpu_ymap, cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(0.f, 0.f, 0.f, 0.f));
// Check
if(d_Mat_RGBA.data != pdata)
std::cerr << "Error reallocated buffer for d_Mat_RGBA" << std::endl;
}
/**
* Performs CUDA Operations on egl image.
*
* @param image : EGL image
*/
static void
gpu_process (EGLImageKHR image, void ** usrptr)
{
CUresult status;
CUeglFrame eglFrame;
CUgraphicsResource pResource = NULL;
cudaFree(0);
status = cuGraphicsEGLRegisterImage(&pResource, image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
if (status != CUDA_SUCCESS) {
printf("cuGraphicsEGLRegisterImage failed : %d \n", status);
return;
}
status = cuGraphicsResourceGetMappedEglFrame( &eglFrame, pResource, 0, 0);
if (status != CUDA_SUCCESS) {
printf ("cuGraphicsSubResourceGetMappedArray failed\n");
}
status = cuCtxSynchronize();
if (status != CUDA_SUCCESS) {
printf ("cuCtxSynchronize failed \n");
}
if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_ABGR) {
cv_process_RGBA(eglFrame.frame.pPitch[0], eglFrame.width, eglFrame.height);
} else if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR) {
printf ("Invalid eglcolorformat NV12\n");
} else
printf ("Invalid eglcolorformat %d\n", eglFrame.eglColorFormat);
}
status = cuCtxSynchronize();
if (status != CUDA_SUCCESS) {
printf ("cuCtxSynchronize failed after memcpy \n");
}
status = cuGraphicsUnregisterResource(pResource);
if (status != CUDA_SUCCESS) {
printf("cuGraphicsEGLUnRegisterResource failed: %d \n", status);
}
}
const int max_width = 640;
const int max_height = 480;
extern "C" void
init (CustomerFunction * pFuncs)
{
pFuncs->fPreProcess = pre_process;
pFuncs->fGPUProcess = gpu_process;
pFuncs->fPostProcess = post_process;
/* Initialize maps from CPU */
cv::Mat xmap(max_height, max_width, CV_32FC1);
cv::Mat ymap(max_height, max_width, CV_32FC1);
//fill matrices
cv::Mat cam(3, 3, cv::DataType<float>::type);
cam.at<float>(0, 0) = 528.53618582196384f;
cam.at<float>(0, 1) = 0.0f;
cam.at<float>(0, 2) = 314.01736116032430f;
cam.at<float>(1, 0) = 0.0f;
cam.at<float>(1, 1) = 532.01912214324500f;
cam.at<float>(1, 2) = 231.43930864205211f;
cam.at<float>(2, 0) = 0.0f;
cam.at<float>(2, 1) = 0.0f;
cam.at<float>(2, 2) = 1.0f;
cv::Mat dist(4, 1, cv::DataType<float>::type);
dist.at<float>(0, 0) = -0.11839989180635836f;
dist.at<float>(1, 0) = 0.25425420873955445f;
dist.at<float>(2, 0) = 0.0013269901775205413f;
dist.at<float>(3, 0) = 0.0015787467748277866f;
cv::fisheye::initUndistortRectifyMap(cam, dist, cv::Mat(), cam, cv::Size(max_width, max_height), CV_32FC1, xmap, ymap);
/* upload to GpuMats */
gpu_xmap.upload(xmap);
gpu_ymap.upload(ymap);
}
extern "C" void
deinit (void)
{
}
Makefile:
###############################################################################
#
# Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
###############################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
INCLUDE_DIR = /usr/include
LIB_DIR = /usr/lib/aarch64-linux-gnu
TEGRA_LIB_DIR = /usr/lib/aarch64-linux-gnu/tegra
OPENCV_DIR = /usr/local/opencv-github-4.2.0-dev
# For hardfp
#LIB_DIR = /usr/lib/arm-linux-gnueabihf
#TEGRA_LIB_DIR = /usr/lib/arm-linux-gnueabihf/tegra
OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]")
OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/")
OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")
GCC ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
# internal flags
NVCCFLAGS := --shared -std=c++11
CCFLAGS := -fPIC -std=c++11
CVCCFLAGS:=-I$(OPENCV_DIR)/include/opencv4
CVLDFLAGS:=-L$(OPENCV_DIR)/lib -lopencv_core -lopencv_calib3d -lopencv_cudawarping
LDFLAGS :=
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=
override abi := aarch64
LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
# For hardfp
#override abi := gnueabihf
#LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
#CCFLAGS += -mfloat-abi=hard
ifeq ($(ARMv7),1)
NVCCFLAGS += -target-cpu-arch ARM
ifneq ($(TARGET_FS),)
CCFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/$(abi)-linux-gnu
# For hardfp
#LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi)
endif
endif
# Debug build flags
dbg = 0
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I./
LIBRARIES := -L$(LIB_DIR) -lEGL -lGLESv2
LIBRARIES += -L$(TEGRA_LIB_DIR) -lcuda -lrt
################################################################################
# CUDA code generation flags
ifneq ($(OS_ARCH),armv7l)
GENCODE_SM10 := -gencode arch=compute_10,code=sm_10
endif
GENCODE_SM20 := -gencode arch=compute_20,code=sm_20
GENCODE_SM30 := -gencode arch=compute_30,code=sm_30
GENCODE_SM32 := -gencode arch=compute_32,code=sm_32
GENCODE_SM35 := -gencode arch=compute_35,code=sm_35
GENCODE_SM50 := -gencode arch=compute_50,code=sm_50
GENCODE_SMXX := -gencode arch=compute_50,code=compute_50
GENCODE_SM53 := -gencode arch=compute_53,code=compute_53 # for TX1 / Nano
GENCODE_SM62 := -gencode arch=compute_62,code=compute_62 # for TX2
GENCODE_SM72 := -gencode arch=compute_72,code=compute_72 # for Xavier
ifeq ($(OS_ARCH),armv7l)
# This only supports TK1(3.2) -like architectures
GENCODE_FLAGS ?= $(GENCODE_SM32)
else
# This only support TX1/Nano(5.3) or TX2(6.2) or Xavier(7.2) -like architectures
GENCODE_FLAGS ?= $(GEGENCODE_SM53) $(GENCODE_SM62) $(GENCODE_SM72)
endif
# Target rules
all: build
build: lib-gst-custom-opencv_cudaprocess.so
gst-custom-opencv_cudaprocess.o : gst-custom-opencv_cudaprocess.cu
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(CVCCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
lib-gst-custom-opencv_cudaprocess.so : gst-custom-opencv_cudaprocess.o
$(NVCC) $(ALL_LDFLAGS) $(CVLDFLAGS) $(GENCODE_FLAGS) -o $@ $^ $(LIBRARIES)
clean:
rm lib-gst-custom-opencv_cudaprocess.so gst-custom-opencv_cudaprocess.o
clobber: clean