running environment: jetpack 3.1 on tx2
I push frames to the gstreamer’s element nveglstreamsrc via EGL stream FIFO, after setting the pipeline to playing state and connecting the CUDA producer to the EGL stream, i push 4 frame buffers to the FIFO. After that, the producer thread start and push new frames every 40 milliseconds.
When pushing new frames, i first call cuEGLStreamProducerReturnFrame to get the returned frame from EGL stream, copy data to the frame’s buffer, and then call cuEGLStreamProducerPresentFrame to push the frame back to the EGL stream FIFO. I print the returned frame’s address, and found that they are out of order. I push 4 frames on the beginning, and only returned the first 2 frames over and over.
Here is the test log:
CUDA producer initializing EGL display.
EGL API: 1.5
CUDA producer initializing EGL stream.
EGL Stream consumer - Mode: FIFO, Length: 4, latency 0.
NvEglStreamSrcInitializeEgl: Load library: libEGL.so
Connect EGL stream to cuda producer.
CUDA producer present frame: 0xa48940.
CUDA producer present frame: 0xa4a880.
CUDA producer present frame: 0xa4bc00.
CUDA producer present frame: 0xa4cf80.
Present a new frame 1.
CUDA producer return frame: 0xa48940.
Present a new frame 2.
CUDA producer return frame: 0xa4a880.
Present a new frame 3.
CUDA producer return frame: 0xa48940.
Present a new frame 4.
CUDA producer return frame: 0xa4a880.
Present a new frame 5.
CUDA producer return frame: 0xa48940.
Present a new frame 6.
CUDA producer return frame: 0xa4a880.
Present a new frame 7.
CUDA producer return frame: 0xa48940.
Present a new frame 8.
CUDA producer return frame: 0xa4a880.
Present a new frame 9.
CUDA producer return frame: 0xa48940.
Present a new frame 10.
CUDA producer return frame: 0xa4a880.
Terminate EGL display.
And the test codes below:
main.cpp
#include <thread>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <gst/gst.h>
#include <cuda_runtime.h>
#include "eglstreamproducer.h"
static const int FrameWidth = 800;
static const int FrameHeight = 600;
static EGLStreamProducer *eglStreamProducer = nullptr;
void producerThreadFunc()
{
if (cudaFree(nullptr) != cudaSuccess) {
printf("Failed to initialize CUDA context.\n");
return;
}
CUdeviceptr buffer;
CUresult ret = cuMemAlloc(&buffer, FrameWidth * FrameHeight * 3 / 2);
if (ret != CUDA_SUCCESS) {
g_print("cuMemAlloc failed: %d\n.", ret);
return;
}
int cnt = 0;
while (cnt < 50) {
std::this_thread::sleep_for(std::chrono::milliseconds(40));
cnt++;
g_print("Present a new frame %d.\n", cnt);
// call cuEGLStreamProducerReturnFrame to get the returned frame from EGL stream,
// and then call cuEGLStreamProducerPresentFrame to push the frame back to the EGL stream FIFO.
eglStreamProducer->presentFrame(buffer);
}
cuMemFree(buffer);
}
int main(int argc, char *argv[])
{
gst_init(nullptr, nullptr);
GstElement *pipeline = gst_pipeline_new("play");
if (pipeline == nullptr) {
g_print("Create pipeline failed.\n");
return -1;
}
GstElement *source = gst_element_factory_make("nveglstreamsrc", nullptr);
if (source == nullptr) {
g_print("Create eglstream source failed.\n");
return -1;
}
eglStreamProducer = new EGLStreamProducer(4, 0, FrameWidth, FrameHeight);
g_object_set(source, "display", eglStreamProducer->getEGLDisplay(), nullptr);
g_object_set(source, "eglstream", eglStreamProducer->getEGLStream(), nullptr);
GstElement *capFilter = gst_element_factory_make("capsfilter", nullptr);
if (capFilter == nullptr) {
g_print("Create capsfilter failed.\n");
return -1;
}
GstCaps *caps = gst_caps_new_simple("video/x-raw", "format", G_TYPE_STRING, "NV12",
"width", G_TYPE_INT, FrameWidth,
"height", G_TYPE_INT, FrameHeight,
"framerate", GST_TYPE_FRACTION, 25, 1, NULL);
GstCapsFeatures *feature = gst_caps_features_new("memory:NVMM", NULL);
gst_caps_set_features(caps, 0, feature);
/* Set capture caps on capture filter */
g_object_set(capFilter, "caps", caps, NULL);
gst_caps_unref(caps);
GstElement *sink = gst_element_factory_make("fakesink", nullptr);
if (sink == nullptr) {
g_print("Create overlay sink failed.\n");
return -1;
}
gst_bin_add_many(GST_BIN(pipeline), source, capFilter, sink, nullptr);
if (!gst_element_link_many(source, capFilter, sink, nullptr)) {
g_print("Link elememt eglstream source <-> overlay sink failed.\n");
return -1;
}
GstStateChangeReturn ret = gst_element_set_state(pipeline, GST_STATE_PLAYING);
if (ret == GST_STATE_CHANGE_FAILURE) {
g_print("Change pipeline state to %s failed.\n", gst_element_state_get_name(GST_STATE_PLAYING));
return -1;
}
if (!eglStreamProducer->connectEGLProducer()) {
g_print("Connect EGL stream cuda producer failed.\n");
return -1;
}
// Firstly, call cuEGLStreamProducerPresentFrame to push 4 frame buffers to the EGL stream FIFO.
eglStreamProducer->presentFrameBuffers(4);
// start the cuda producer
std::thread t = std::thread(producerThreadFunc);
t.join();
gst_element_set_state(pipeline, GST_STATE_NULL);
gst_object_unref(pipeline);
delete eglStreamProducer;
return 0;
}
eglstreamproducer.h
#ifndef EGLSTREAMPRODUCER_H
#define EGLSTREAMPRODUCER_H
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <cudaEGL.h>
class EGLStreamProducer
{
public:
EGLStreamProducer(int fifoLength, int latency, int width, int height);
~EGLStreamProducer();
EGLDisplay getEGLDisplay() {
return display;
}
EGLStreamKHR getEGLStream() {
return stream;
}
bool connectEGLProducer();
int presentFrameBuffers(int bufferNum);
int presentFrame(CUdeviceptr data);
private:
bool initEGLDisplay();
bool initEGLStream();
void finalizeEGLStream();
void finalizeEGLCudaProducer();
EGLDisplay display;
EGLStreamKHR stream;
int fifoLength;
bool fifoMode;
int latency;
int width;
int height;
CUeglStreamConnection cudaConnection;
};
#endif // EGLSTREAMPRODUCER_H
eglstreamproducer.cpp
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <cuda_runtime.h>
#include "eglstreamproducer.h"
#include "EGLAPIAccessors.hpp"
EGLStreamProducer::EGLStreamProducer(int fifoLength, int latency, int width, int height)
{
display = EGL_NO_DISPLAY;
stream = EGL_NO_STREAM_KHR;
this->fifoLength = fifoLength;
if (fifoLength > 0) {
fifoMode = true;
} else {
fifoMode = false;
}
this->latency = latency;
this->width = width;
this->height = height;
printf("CUDA producer initializing EGL display.\n");
if (!initEGLDisplay()) {
printf("Cannot initialize EGL display.\n");
return;
}
printf("CUDA producer initializing EGL stream.\n");
if (!initEGLStream()) {
printf("Cannot initialize EGL Stream.\n");
return;
}
}
EGLStreamProducer::~EGLStreamProducer()
{
finalizeEGLCudaProducer();
finalizeEGLStream();
}
bool EGLStreamProducer::connectEGLProducer()
{
printf("Connect EGL stream to cuda producer.\n");
if (cudaFree(nullptr) != cudaSuccess) {
printf("Failed to initialize CUDA context.\n");
return false;
}
CUresult ret = cuEGLStreamProducerConnect(&cudaConnection, stream, width, height);
if (ret != CUDA_SUCCESS) {
printf("Connect CUDA producer ERROR %d.\n", ret);
return false;
}
return true;
}
int EGLStreamProducer::presentFrameBuffers(int bufferNum)
{
CUresult ret;
if (cudaFree(nullptr) != cudaSuccess) {
printf("Failed to initialize CUDA context.\n");
return -1;
}
for (int i = 0; i < bufferNum; i++) {
CUarray cudaArr[3] = {0};
CUDA_ARRAY3D_DESCRIPTOR desc = {0};
desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
desc.Depth = 1;
desc.NumChannels = 1;
desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
for (int i = 0; i < 3; i++) {
if (i == 0) {
desc.Width = width;
desc.Height = height;
} else {
desc.Width = width / 2;
desc.Height = height / 2;
}
ret = cuArray3DCreate(&cudaArr[i], &desc);
if (ret != CUDA_SUCCESS) {
printf("CUDA create 3D array failed: %d.\n", ret);
return -1;
}
}
CUeglFrame eglFrame;
eglFrame.planeCount = 3;
eglFrame.numChannels = 1;
eglFrame.width = width;
eglFrame.height = height;
eglFrame.depth = 1;
eglFrame.pitch = 0;
eglFrame.cuFormat = CU_AD_FORMAT_UNSIGNED_INT8;
eglFrame.eglColorFormat = CU_EGL_COLOR_FORMAT_YUV420_PLANAR;
eglFrame.frameType = CU_EGL_FRAME_TYPE_ARRAY;
eglFrame.frame.pArray[0] = cudaArr[0];
eglFrame.frame.pArray[1] = cudaArr[1];
eglFrame.frame.pArray[2] = cudaArr[2];
printf("CUDA producer present frame: %p.\n", eglFrame.frame.pArray[0]);
CUresult ret = cuEGLStreamProducerPresentFrame(&cudaConnection, eglFrame, nullptr);
if (ret != CUDA_SUCCESS) {
printf("CUDA producer present frame failed: %d.\n", ret);
return -1;
}
}
return 0;
}
int EGLStreamProducer::presentFrame(CUdeviceptr data)
{
CUresult ret;
if (cudaFree(nullptr) != cudaSuccess) {
printf("Failed to initialize CUDA context.\n");
return -1;
}
CUeglFrame eglFrame;
ret = cuEGLStreamProducerReturnFrame(&cudaConnection, &eglFrame, nullptr);
if (ret != CUDA_SUCCESS) {
printf("CUDA producer return frame failed: %d.\n", ret);
return -1;
}
printf("CUDA producer return frame: %p.\n", eglFrame.frame.pArray[0]);
CUDA_MEMCPY3D cpdesc;
size_t offsets[3], copyWidth[3], copyHeight[3];
offsets[0] = 0;
offsets[1] = width * height;
offsets[2] = offsets[1] + width * height / 4;
copyWidth[0] = width;
copyWidth[1] = width / 2;
copyWidth[2] = width / 2;
copyHeight[0] = height;
copyHeight[1] = height / 2;
copyHeight[2] = height / 2;
for (int i = 0; i < 3; i++) {
memset(&cpdesc, 0, sizeof(cpdesc));
cpdesc.srcMemoryType = CU_MEMORYTYPE_DEVICE;
cpdesc.srcDevice = (CUdeviceptr)((char *)data + offsets[i]);
cpdesc.dstMemoryType = CU_MEMORYTYPE_ARRAY;
cpdesc.dstArray = eglFrame.frame.pArray[i];
cpdesc.WidthInBytes = copyWidth[i];
cpdesc.Height = copyHeight[i];
cpdesc.Depth = 1;
ret = cuMemcpy3D(&cpdesc);
// ret = cuMemcpyDtoA(eglFrame.frame.pArray[i], 0, (CUdeviceptr)((char *)data + offsets[i]), 1);
if (ret != CUDA_SUCCESS) {
printf("CUDA producer copy data to EGL frame failed: %d.\n", ret);
return -1;
}
}
ret = cuEGLStreamProducerPresentFrame(&cudaConnection, eglFrame, nullptr);
if (ret != CUDA_SUCCESS) {
printf("CUDA producer present frame failed: %d.\n", ret);
return -1;
}
return 0;
}
bool EGLStreamProducer::initEGLDisplay()
{
// Obtain the EGL display
display = EGLDisplayAccessor::getInstance();
if (display == EGL_NO_DISPLAY) {
printf("Obtain EGL display failed.\n");
return false;
}
return true;
}
bool EGLStreamProducer::initEGLStream()
{
const EGLint streamAttrMailboxMode[] = { EGL_NONE };
const EGLint streamAttrFIFOMode[] = { EGL_STREAM_FIFO_LENGTH_KHR, fifoLength, EGL_NONE };
if (!setupEGLExtensions()) {
return false;
}
stream = eglCreateStreamKHR(display, fifoMode ? streamAttrFIFOMode : streamAttrMailboxMode);
if (stream == EGL_NO_STREAM_KHR) {
printf("Couldn't create stream.\n");
return false;
}
if (!eglStreamAttribKHR(display, stream, EGL_CONSUMER_LATENCY_USEC_KHR, latency)) {
printf("Producer: streamAttribKHR EGL_CONSUMER_LATENCY_USEC_KHR failed.\n");
}
if (!eglStreamAttribKHR(display, stream, EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR, latency)) {
printf("Producer: streamAttribKHR EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR failed.\n");
}
// Get stream attributes
if (!eglQueryStreamKHR(display, stream, EGL_STREAM_FIFO_LENGTH_KHR, &fifoLength)) {
printf("Producer: eglQueryStreamKHR EGL_STREAM_FIFO_LENGTH_KHR failed.\n");
}
if (!eglQueryStreamKHR(display, stream, EGL_CONSUMER_LATENCY_USEC_KHR, &latency)) {
printf("Producer: eglQueryStreamKHR EGL_CONSUMER_LATENCY_USEC_KHR failed.\n");
}
if (fifoMode != (fifoLength > 0)) {
printf("EGL Stream consumer - Unable to set FIFO mode.\n");
fifoMode = false;
}
if (fifoMode) {
printf("EGL Stream consumer - Mode: FIFO, Length: %d, latency %d.\n", fifoLength, latency);
} else {
printf("EGL Stream consumer - Mode: Mailbox.\n");
}
return true;
}
void EGLStreamProducer::finalizeEGLStream()
{
if (stream != EGL_NO_STREAM_KHR) {
eglDestroyStreamKHR(display, stream);
stream = EGL_NO_STREAM_KHR;
}
}
void EGLStreamProducer::finalizeEGLCudaProducer()
{
if (cudaConnection) {
if (cudaFree(nullptr) != cudaSuccess) {
printf("Failed to initialize CUDA context.\n");
return;
}
cuEGLStreamProducerDisconnect(&cudaConnection);
cudaConnection = nullptr;
}
}
EGLAPIAccessors.hpp
#ifndef EGLAPIACCESSORS_HPP
#define EGLAPIACCESSORS_HPP
#include <EGL/egl.h>
#include <EGL/eglext.h>
#if !defined EGL_KHR_stream || !defined EGL_KHR_stream_fifo || !defined EGL_KHR_stream_consumer_gltexture
# error "EGL_KHR_stream extensions are not supported!"
#endif
class EGLDisplayAccessor
{
public:
static EGLDisplay getInstance();
private:
EGLDisplayAccessor();
~EGLDisplayAccessor();
EGLDisplay eglDisplay;
};
#define EXTENSION_LIST_MY(T) \
T( PFNEGLCREATESTREAMKHRPROC, eglCreateStreamKHR ) \
T( PFNEGLDESTROYSTREAMKHRPROC, eglDestroyStreamKHR ) \
T( PFNEGLQUERYSTREAMKHRPROC, eglQueryStreamKHR ) \
T( PFNEGLSTREAMATTRIBKHRPROC, eglStreamAttribKHR )
#define EXTLST_EXTERN(tx, x) extern tx x;
EXTENSION_LIST_MY(EXTLST_EXTERN)
bool setupEGLExtensions();
#endif // EGLAPIACCESSORS_HPP
EGLAPIAccessors.cpp
#include <stdio.h>
#include "EGLAPIAccessors.hpp"
EGLDisplay EGLDisplayAccessor::getInstance()
{
static EGLDisplayAccessor instance;
return instance.eglDisplay;
}
EGLDisplayAccessor::EGLDisplayAccessor()
{
// Obtain the EGL display
if ((eglDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY)) == EGL_NO_DISPLAY) {
printf("EGL failed to obtain display.\n");
}
// Initialize EGL
EGLint major, minor;
if (!eglInitialize(eglDisplay, &major, &minor)) {
printf("EGL failed to initialize.\n");
eglTerminate(eglDisplay);
eglDisplay = EGL_NO_DISPLAY;
} else {
printf("EGL API: %d.%d\n", major, minor);
}
}
EGLDisplayAccessor::~EGLDisplayAccessor()
{
if (eglDisplay != EGL_NO_DISPLAY) {
eglTerminate(eglDisplay);
eglDisplay = EGL_NO_DISPLAY;
printf("Terminate EGL display.\n");
fflush(stdout);
}
}
static bool initialized = false;
#define EXTLST_IMPL_MY(tx, x) tx x = nullptr;
EXTENSION_LIST_MY(EXTLST_IMPL_MY)
typedef void (* extlst_fnptr_t)(void);
#define EXTLST_ENTRY_MY(tx, x) { ( extlst_fnptr_t *)&x, #x },
static struct {
extlst_fnptr_t * fnptr;
char const * name;
} extensionList[] = { EXTENSION_LIST_MY(EXTLST_ENTRY_MY) };
bool setupEGLExtensions()
{
if (!initialized) {
for (size_t i = 0; i < sizeof(extensionList) / sizeof(extensionList[0]); i++) {
*extensionList[i].fnptr = eglGetProcAddress(extensionList[i].name);
if (!*extensionList[i].fnptr) {
printf("Couldn't get address of %s()\n", extensionList[i].name);
return false;
}
}
initialized = true;
}
return true;
}