Hi, all
Hardware: Tegra X1
System version: Linux For Tegra R24.2.1
I found the R24.2 released with a new L4TMultimediaAPI. It have V4L2 API for encode, decode, scaling, and other media functions.
I am very interested in it. So I made some testing with the example 03_video_cuda_enc. Then, I found the output_plane.qBuffer takes many time (about 10ms) but output_plane.dqBuffer almost take no time (about 50us). I am confused about this. It should not be the output_plane.dqBuffer waiting for the buffer returned back from encoder?
So, I added some debug info in code:
diff --git a/samples/03_video_cuda_enc/video_cuda_enc_main.cpp b/samples/03_video_cuda_enc/video_cuda_enc_main.cpp
index 537ec19..d89dd01 100644
--- a/samples/03_video_cuda_enc/video_cuda_enc_main.cpp
+++ b/samples/03_video_cuda_enc/video_cuda_enc_main.cpp
@@ -33,6 +33,10 @@
#include <linux/videodev2.h>
#include <malloc.h>
#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
#include "nvbuf_utils.h"
#include "NvCudaProc.h"
@@ -45,6 +49,8 @@
using namespace std;
+struct timespec ts1, ts2, ts3, ts4, ts5;
+
static void
abort(context_t *ctx)
{
@@ -73,6 +79,9 @@ encoder_capture_plane_dq_callback(struct v4l2_buffer *v4l2_buf, NvBuffer * buffe
return false;
}
+ clock_gettime(CLOCK_MONOTONIC, &ts5);
+ printf("encoder_capture_plane_dq_callback index=%d ts5 time=%ld\n", v4l2_buf->index, ts5.tv_sec * 1000000000 + ts5.tv_nsec);
+
write_encoder_output_frame(ctx->out_file, buffer);
if (enc->capture_plane.qBuffer(*v4l2_buf, NULL) < 0)
@@ -108,6 +117,7 @@ main(int argc, char *argv[])
int ret = 0;
int error = 0;
bool eos = false;
+ int frame_count = 10;
set_defaults(&ctx);
@@ -157,7 +167,8 @@ main(int argc, char *argv[])
if (ctx.encoder_pixfmt == V4L2_PIX_FMT_H264)
{
- ret = ctx.enc->setProfile(V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
+ //ret = ctx.enc->setProfile(V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
+ ret = ctx.enc->setProfile(V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE);
}
else
{
@@ -176,12 +187,12 @@ main(int argc, char *argv[])
// Query, Export and Map the output plane buffers so that we can read
// raw data into the buffers
- ret = ctx.enc->output_plane.setupPlane(V4L2_MEMORY_MMAP, 10, true, false);
+ ret = ctx.enc->output_plane.setupPlane(V4L2_MEMORY_MMAP, 4, true, false);
TEST_ERROR(ret < 0, "Could not setup output plane", cleanup);
// Query, Export and Map the output plane buffers so that we can write
// encoded data from the buffers
- ret = ctx.enc->capture_plane.setupPlane(V4L2_MEMORY_MMAP, 10, true, false);
+ ret = ctx.enc->capture_plane.setupPlane(V4L2_MEMORY_MMAP, 4, true, false);
TEST_ERROR(ret < 0, "Could not setup capture plane", cleanup);
// output plane STREAMON
@@ -245,13 +256,18 @@ main(int argc, char *argv[])
HandleEGLImage(&ctx.eglimg);
NvDestroyEGLImage(ctx.eglDisplay, ctx.eglimg);
+ clock_gettime(CLOCK_MONOTONIC, &ts3);
+ printf("before output_plane.qBuffer index=%d ts3 time=%ld\n", v4l2_buf.index, ts3.tv_sec * 1000000000 + ts3.tv_nsec);
ret = ctx.enc->output_plane.qBuffer(v4l2_buf, NULL);
+ clock_gettime(CLOCK_MONOTONIC, &ts4);
+ printf("after output_plane.qBuffer index=%d ts4 time=%ld\n", v4l2_buf.index, ts4.tv_sec * 1000000000 + ts4.tv_nsec);
if (ret < 0)
{
cerr << "Error while queueing buffer at output plane" << endl;
abort(&ctx);
goto cleanup;
}
+ printf("\n");
if (v4l2_buf.m.planes[0].bytesused == 0)
{
@@ -273,12 +289,16 @@ main(int argc, char *argv[])
v4l2_buf.m.planes = planes;
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+ printf("before output_plane.dqBuffer index=%d ts1 time=%ld\n", v4l2_buf.index, ts1.tv_sec * 1000000000 + ts1.tv_nsec);
if (ctx.enc->output_plane.dqBuffer(v4l2_buf, &buffer, NULL, 10) < 0)
{
cerr << "ERROR while DQing buffer at output plane" << endl;
abort(&ctx);
goto cleanup;
}
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ printf("after output_plane.dqBuffer index=%d ts2 time=%ld\n", v4l2_buf.index, ts2.tv_sec * 1000000000 + ts2.tv_nsec);
if (read_video_frame(ctx.in_file, *buffer) < 0)
{
@@ -290,13 +310,25 @@ main(int argc, char *argv[])
HandleEGLImage(&ctx.eglimg);
NvDestroyEGLImage(ctx.eglDisplay, ctx.eglimg);
+ clock_gettime(CLOCK_MONOTONIC, &ts3);
+ printf("before output_plane.qBuffer index=%d ts3 time=%ld\n", v4l2_buf.index, ts3.tv_sec * 1000000000 + ts3.tv_nsec);
ret = ctx.enc->output_plane.qBuffer(v4l2_buf, NULL);
+ clock_gettime(CLOCK_MONOTONIC, &ts4);
+ printf("after output_plane.qBuffer index=%d ts4 time=%ld\n", v4l2_buf.index, ts4.tv_sec * 1000000000 + ts4.tv_nsec);
if (ret < 0)
{
cerr << "Error while queueing buffer at output plane" << endl;
abort(&ctx);
goto cleanup;
}
+ printf("\n");
+
+ frame_count--;
+ if (frame_count == 0) {
+ cout << "sleep and exit" << endl;
+ usleep(2000000);
+ break;
+ }
if (v4l2_buf.m.planes[0].bytesused == 0)
{
diff --git a/samples/common/classes/NvUtils.cpp b/samples/common/classes/NvUtils.cpp
index 9a994a2..c36111d 100644
--- a/samples/common/classes/NvUtils.cpp
+++ b/samples/common/classes/NvUtils.cpp
@@ -46,9 +46,11 @@ read_video_frame(std::ifstream * stream, NvBuffer & buffer)
plane.bytesused = 0;
for (j = 0; j < plane.fmt.height; j++)
{
+/*
stream->read(data, bytes_to_read);
if (stream->gcount() < bytes_to_read)
return -1;
+*/
data += plane.fmt.stride;
}
plane.bytesused = plane.fmt.stride * plane.fmt.height;
Then, the output:
Failed to query video capabilities: Bad address
NvMMLiteOpen : Block : BlockType = 4
===== MSENC =====
NvMMLiteBlockCreate : Block : BlockType = 4
875967048
842091865
before output_plane.qBuffer index=0 ts3 time=20736071424891 // output_plane.qBuffer frame 0 start
===== MSENC blits (mode: 1) into tiled surfaces =====
encoder_capture_plane_dq_callback index=0 ts5 time=20736081566684 // encoded frame 0(SPS+PPS+IDR_SLICE) output, encoder take 10141793(ts5 - ts3) ns
after output_plane.qBuffer index=0 ts4 time=20736081663761 // output_plane.qBuffer frame 0 end, qBuffer take 10238870(ts4 - ts3) ns
before output_plane.qBuffer index=1 ts3 time=20736082638276 // output_plane.qBuffer frame 1 start
after output_plane.qBuffer index=1 ts4 time=20736091488806 // output_plane.qBuffer frame 1 end, qBuffer take 8850530(ts4 - ts3) ns
encoder_capture_plane_dq_callback index=1 ts5 time=20736091697698 // encoded frame 1(SLICE) output, encoder take 9059422(ts5 - ts3) ns
before output_plane.qBuffer index=2 ts3 time=20736092426032 // output_plane.qBuffer frame 2 start
encoder_capture_plane_dq_callback index=2 ts5 time=20736101379003 // encoded frame 1(SLICE) output, encoder take 8952971(ts5 - ts3) ns
after output_plane.qBuffer index=2 ts4 time=20736101417854 // output_plane.qBuffer frame 1 end, qBuffer take 8991822(ts4 - ts3) ns
before output_plane.qBuffer index=3 ts3 time=20736102331905
encoder_capture_plane_dq_callback index=3 ts5 time=20736111248889
after output_plane.qBuffer index=3 ts4 time=20736111286178
before output_plane.qBuffer index=4 ts3 time=20736112202885
encoder_capture_plane_dq_callback index=4 ts5 time=20736120931808
after output_plane.qBuffer index=4 ts4 time=20736120968108
before output_plane.qBuffer index=5 ts3 time=20736121883513
encoder_capture_plane_dq_callback index=5 ts5 time=20736130510724
after output_plane.qBuffer index=5 ts4 time=20736130547545
before output_plane.qBuffer index=6 ts3 time=20736131477584
after output_plane.qBuffer index=6 ts4 time=20736140275826
encoder_capture_plane_dq_callback index=6 ts5 time=20736140400765
before output_plane.qBuffer index=7 ts3 time=20736141196960
encoder_capture_plane_dq_callback index=7 ts5 time=20736149853388
after output_plane.qBuffer index=7 ts4 time=20736149890104
before output_plane.qBuffer index=8 ts3 time=20736150908263
encoder_capture_plane_dq_callback index=8 ts5 time=20736159592189
after output_plane.qBuffer index=8 ts4 time=20736159629895
before output_plane.qBuffer index=9 ts3 time=20736160603004
encoder_capture_plane_dq_callback index=9 ts5 time=20736169510249
after output_plane.qBuffer index=9 ts4 time=20736169544621
before output_plane.dqBuffer index=0 ts1 time=20736169575505 // output_plane.dqBuffer frame 0 start
after output_plane.dqBuffer index=0 ts2 time=20736169611179 // output_plane.dqBuffer frame 0 end, dqBuffer take 35674(ts2 - ts1) ns
before output_plane.qBuffer index=0 ts3 time=20736170246552
encoder_capture_plane_dq_callback index=0 ts5 time=20736178912875
after output_plane.qBuffer index=0 ts4 time=20736178943186
before output_plane.dqBuffer index=0 ts1 time=20736178984693
after output_plane.dqBuffer index=1 ts2 time=20736179003806
before output_plane.qBuffer index=1 ts3 time=20736179639752
encoder_capture_plane_dq_callback index=1 ts5 time=20736188261599
after output_plane.qBuffer index=1 ts4 time=20736188291806
before output_plane.dqBuffer index=0 ts1 time=20736188332219
after output_plane.dqBuffer index=2 ts2 time=20736188351645
before output_plane.qBuffer index=2 ts3 time=20736188944885
encoder_capture_plane_dq_callback index=2 ts5 time=20736197578867
after output_plane.qBuffer index=2 ts4 time=20736197608136
before output_plane.dqBuffer index=0 ts1 time=20736197639853
after output_plane.dqBuffer index=3 ts2 time=20736197665267
before output_plane.qBuffer index=3 ts3 time=20736198262257
encoder_capture_plane_dq_callback index=3 ts5 time=20736206880615
after output_plane.qBuffer index=3 ts4 time=20736206909415
before output_plane.dqBuffer index=0 ts1 time=20736206949881
after output_plane.dqBuffer index=4 ts2 time=20736206975921
before output_plane.qBuffer index=4 ts3 time=20736207558381
encoder_capture_plane_dq_callback index=4 ts5 time=20736216185697
after output_plane.qBuffer index=4 ts4 time=20736216214965
before output_plane.dqBuffer index=0 ts1 time=20736216246265
after output_plane.dqBuffer index=5 ts2 time=20736216271576
before output_plane.qBuffer index=5 ts3 time=20736216948769
encoder_capture_plane_dq_callback index=5 ts5 time=20736225559419
after output_plane.qBuffer index=5 ts4 time=20736225589261
before output_plane.dqBuffer index=0 ts1 time=20736225620717
after output_plane.dqBuffer index=6 ts2 time=20736225646757
before output_plane.qBuffer index=6 ts3 time=20736226286452
after output_plane.qBuffer index=6 ts4 time=20736234854866
before output_plane.dqBuffer index=0 ts1 time=20736234900123
after output_plane.dqBuffer index=7 ts2 time=20736234923194
encoder_capture_plane_dq_callback index=6 ts5 time=20736235002980
before output_plane.qBuffer index=7 ts3 time=20736235495550
encoder_capture_plane_dq_callback index=7 ts5 time=20736244094483
after output_plane.qBuffer index=7 ts4 time=20736244172602
before output_plane.dqBuffer index=0 ts1 time=20736244214214
after output_plane.dqBuffer index=8 ts2 time=20736244233327
before output_plane.qBuffer index=8 ts3 time=20736244862762
encoder_capture_plane_dq_callback index=8 ts5 time=20736253500546
after output_plane.qBuffer index=8 ts4 time=20736253531690
before output_plane.dqBuffer index=0 ts1 time=20736253563980
after output_plane.dqBuffer index=9 ts2 time=20736253588874
before output_plane.qBuffer index=9 ts3 time=20736254303616
encoder_capture_plane_dq_callback index=9 ts5 time=20736262929682
after output_plane.qBuffer index=9 ts4 time=20736262959211
sleep and exit
App run was successful
Is this a BUG?
or
Is the VideoEncoder’s qBuffer work in sync mode waiting for encoding completion of the frame?
If so, I am afraid the buffer pool is needless.