nvargus-daemon freeze/hang on pipeline stop on R32.1

My pipeline across two cameras works fine when starting recording. However, when I try to stop the pipeline, after emitting an EOS signal manually on stop and catching it I see this:

2019-08-29 10:17:40,305 DEBUG: Caught EOS event...
GST_ARGUS: Cleaning up
GST_ARGUS: 
PowerServiceHwVic::cleanupResources
CONSUMER: Done Success
GST_ARGUS: Done Success
GST_ARGUS: Cleaning up

The log message with the timestamp is my own application debug logging output. Here are the exact lines:

logger.debug("Caught EOS event...")
self.recorder_pipeline.set_state(Gst.State.NULL)
logger.debug("Set pipeline to NULL state...")

The last few debug lines from GStreamer are:

0:00:15.035895991 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00;01m                 bin gstbin.c:4054:gst_bin_handle_message_func:<pipeline0>^[[00m posting message upward
0:00:15.035952119 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00;43m             GST_BUS gstbus.c:318:gst_bus_post:<bus2>^[[00m [msg 0x27981400] posting on bus stream-status message: 0x27981400, time 99:99:99.999999999, seq-num 280, element 'src', GstMessageStreamStatus, type=(GstStreamStatusType)GST_STREAM_STATUS_TYPE_LEAVE, owner=(GstElement)"\(GstNvArgusCameraSrc\)\ nvcam0", object=(GstTask)"\(GstTask\)\ nvcam0:src";
0:00:15.036012310 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00;43m             GST_BUS gstbus.c:357:gst_bus_post:<bus2>^[[00m [msg 0x27981400] pushing on async queue
0:00:15.036046678 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00;43m             GST_BUS gstbus.c:360:gst_bus_post:<bus2>^[[00m [msg 0x27981400] pushed on async queue
0:00:15.036080278 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00;43m             GST_BUS gstbus.c:353:gst_bus_post:<bus1>^[[00m [msg 0x27981400] dropped
0:00:15.036113110 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00;43m             GST_BUS gstbus.c:353:gst_bus_post:<bus0>^[[00m [msg 0x27981400] dropped
0:00:15.036149749 ^[[336m17765^[[00m     0x27863680 ^[[37mDEBUG  ^[[00m ^[[00m                task gsttask.c:359:gst_task_func:^[[00m Exit task 0x278884d0, thread 0x27863680
0:00:15.036248564 ^[[336m17765^[[00m     0x27284930 ^[[37mDEBUG  ^[[00m ^[[00m                task gsttask.c:815:gst_task_join:<nvcam0:src>^[[00m Joining task 0x278884d0, thread 0x27284930
0:00:15.036319540 ^[[336m17765^[[00m     0x27284930 ^[[37mDEBUG  ^[[00m ^[[00m                task gsttask.c:845:gst_task_join:<nvcam0:src>^[[00m Joined task 0x278884d0
0:00:15.036346292 ^[[336m17765^[[00m     0x27284930 ^[[37mDEBUG  ^[[00m ^[[00m             basesrc gstbasesrc.c:3679:gst_base_src_set_flushing:<nvcam0>^[[00m flushing 0

strace shows my process is waiting on a futex indefinitely:

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, "\t

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

66

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

", 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, "0K060202114717", 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, "\t

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

67

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

", 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, "0U060202114717", 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, "7

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

70

futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL


futex(0x4714538, FUTEX_WAIT_PRIVATE, 5, NULL) = 0
futex(0x4628200, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x46280c8, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\266\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10K\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999998144}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\t\0\0\0\267\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10U\20\360\320\221\314\371\17”, 9, MSG_NOSIGNAL, NULL, 0) = 9
futex(0x4724528, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999488}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(37, “\27\0\0\0\270\0\0\0”, 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, “\10V\20\360\320\221\314\371\17\2722\v\20\377\377\377\377\377\377\377\377\377\1”, 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

", 8, MSG_NOSIGNAL, NULL, 0) = 8
sendto(37, "0V060202114717722\v0777777777777777777", 23, MSG_NOSIGNAL, NULL, 0) = 23
futex(0x47fd868, FUTEX_WAIT_PRIVATE, 0, {tv_sec=59, tv_nsec=999999456}) = 0
futex(0x7f40001190, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x4628458, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x4628448, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f677fe2c0, FUTEX_WAIT, 17962, NULL

This worked fine on R28.2.1 but now hangs on set_state() indefinitely and my application goes gray waiting on this call to come back.

This seems like regression. The main change in my pipeline was moving from nvcamerasrc to nvarguscamerasrc. That’s about it.

What am I doing wrong?

UPDATE: If I run this same pipeline using gst-launch-1.0 and let it record for a while and hit Ctrl-C I can also get it to hang on a futex. This definitely seems like breakage to me.

gst-launch-1.0 matroskamux name=muxer streamable=true ! multifilesink name=mfsink post-messages=true next-file=5 max-file-duration=60000000000 location=capture_%08d.mkv nvarguscamerasrc do-timestamp=true awblock=true aelock=true sensor-id=0 name=nvcam0 ! video/x-raw(memory:NVMM), width=(int)4104, height=(int)3046, framerate=(fraction)30/1 ! nvvidconv flip-method=2 ! video/x-raw(memory:NVMM), width=(int)3840, height=(int)2160, format=(string)I420 ! omxh264enc control-rate=2 profile=8 bitrate=30000000 ! video/x-h264, stream-format=(string)byte-stream ! h264parse ! identity name=tap0 ! muxer.video_0 nvarguscamerasrc do-timestamp=true awblock=true aelock=true sensor-id=1 name=nvcam1 ! video/x-raw(memory:NVMM), width=(int)4104, height=(int)3046, framerate=(fraction)30/1 ! nvvidconv flip-method=2 ! video/x-raw(memory:NVMM), width=(int)3840, height=(int)2160, format=(string)I420 ! omxh264enc control-rate=2 profile=8 bitrate=30000000 ! video/x-h264, stream-format=(string)byte-stream ! h264parse ! identity name=tap1 ! muxer.video_1
Setting pipeline to PAUSED ...
Pipeline is live and does not need PREROLL ...
Framerate set to : 30 at NvxVideoEncoderSetParameterFramerate set to : 30 at NvxVideoEncoderSetParameterNvMMLiteOpen : Block : BlockType = 4 
NvMMLiteOpen : Block : BlockType = 4 
===== NVMEDIA: NVENC =====
===== NVMEDIA: NVENC =====
NvMMLiteBlockCreate : Block : BlockType = 4 
NvMMLiteBlockCreate : Block : BlockType = 4 
Setting pipeline to PLAYING ...
H264: Profile = 100, Level = 40 
H264: Profile = 100, Level = 40 
New clock: GstSystemClock
GST_ARGUS: Creating output stream
GST_ARGUS: Creating output stream
CONSUMER: Waiting until producer is connected...
GST_ARGUS: Available Sensor modes :
GST_ARGUS: 4000 x 3000 FR = 29.999999 fps Duration = 33333334 ; Analog Gain range min 1.000000, max 251.188705; Exposure Range min 52000, max 660000000;

CONSUMER: Waiting until producer is connected...
GST_ARGUS: Running with following settings:
   Camera index = 1 
   Camera mode  = 0 
   Output Stream W = 4000 H = 3000 
   seconds to Run    = 0 
   Frame Rate = 29.999999 
GST_ARGUS: PowerService: requested_clock_Hz=164072784
GST_ARGUS: Setup Complete, Starting captures for 0 seconds
GST_ARGUS: Starting repeat capture requests.
GST_ARGUS: Available Sensor modes :
GST_ARGUS: 4000 x 3000 FR = 29.999999 fps Duration = 33333334 ; Analog Gain range min 1.000000, max 251.188705; Exposure Range min 52000, max 660000000;

GST_ARGUS: Running with following settings:
   Camera index = 0 
   Camera mode  = 0 
   Output Stream W = 4000 H = 3000 
   seconds to Run    = 0 
   Frame Rate = 29.999999 
GST_ARGUS: Setup Complete, Starting captures for 0 seconds
GST_ARGUS: Starting repeat capture requests.
CONSUMER: Producer has connected; continuing.
CONSUMER: Producer has connected; continuing.
^Chandling interrupt.
Interrupt: Stopping pipeline ...
Execution ended after 0:01:32.894848900
Setting pipeline to PAUSED ...
Setting pipeline to READY ...
GST_ARGUS: Cleaning up
GST_ARGUS: 
PowerServiceHwVic::cleanupResources

hello alex.sack,

could you please narrow down the issue,
for example, please exclude nvvidconv and also encoder to check the use-case is workable.
you may use the fakesink to replace nvvidconv and also omxh264enc to verify the pipeline.
thereafter, you might check which component caused the failure.
thanks

0:00:09.232708735  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:318:gst_bus_post:<bus0> [msg 0x7f88002e80] posting on bus stream-status message: 0x7f88002e80, time 99:99:99.999999999, seq-num 90, element 'src', GstMessageStreamStatus, type=(GstStreamStatusType)GST_STREAM_STATUS_TYPE_LEAVE, owner=(GstElement)"\(GstNvArgusCameraSrc\)\ nvcam0", object=(GstTask)"\(GstTask\)\ nvcam0:src";
0:00:09.232732127  5086     0x27c9ead0 DEBUG                    bin gstbin.c:3717:gst_bin_handle_message_func:<bin0> [msg 0x7f88002e80] handling child src message of type stream-status
0:00:09.232747135  5086     0x27c9ead0 DEBUG                    bin gstbin.c:4054:gst_bin_handle_message_func:<bin0> posting message upward
0:00:09.232780415  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:318:gst_bus_post:<bus1> [msg 0x7f88002e80] posting on bus stream-status message: 0x7f88002e80, time 99:99:99.999999999, seq-num 90, element 'src', GstMessageStreamStatus, type=(GstStreamStatusType)GST_STREAM_STATUS_TYPE_LEAVE, owner=(GstElement)"\(GstNvArgusCameraSrc\)\ nvcam0", object=(GstTask)"\(GstTask\)\ nvcam0:src";
0:00:09.232801919  5086     0x27c9ead0 DEBUG                    bin gstbin.c:3717:gst_bin_handle_message_func:<pipeline0> [msg 0x7f88002e80] handling child src message of type stream-status
0:00:09.232816287  5086     0x27c9ead0 DEBUG                    bin gstbin.c:4054:gst_bin_handle_message_func:<pipeline0> posting message upward
0:00:09.232848159  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:318:gst_bus_post:<bus2> [msg 0x7f88002e80] posting on bus stream-status message: 0x7f88002e80, time 99:99:99.999999999, seq-num 90, element 'src', GstMessageStreamStatus, type=(GstStreamStatusType)GST_STREAM_STATUS_TYPE_LEAVE, owner=(GstElement)"\(GstNvArgusCameraSrc\)\ nvcam0", object=(GstTask)"\(GstTask\)\ nvcam0:src";
0:00:09.232867455  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:357:gst_bus_post:<bus2> [msg 0x7f88002e80] pushing on async queue
0:00:09.232883679  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:360:gst_bus_post:<bus2> [msg 0x7f88002e80] pushed on async queue
0:00:09.232900127  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:353:gst_bus_post:<bus1> [msg 0x7f88002e80] dropped
0:00:09.232915871  5086     0x27c9ead0 DEBUG                GST_BUS gstbus.c:353:gst_bus_post:<bus0> [msg 0x7f88002e80] dropped
0:00:09.232935775  5086     0x27c9ead0 DEBUG                   task gsttask.c:359:gst_task_func: Exit task 0x27e4c170, thread 0x27c9ead0
0:00:09.232939871  5086     0x27787530 DEBUG                   task gsttask.c:845:gst_task_join:<nvcam0:src> Joined task 0x27e4c170
0:00:09.232971775  5086     0x27787530 DEBUG                basesrc gstbasesrc.c:3679:gst_base_src_set_flushing:<nvcam0> flushing 0

Here is now literally the pipeline:

nvarguscamerasrc do-timestamp=true awblock=true aelock=true sensor-id=0 name=nvcam0
! video/x-raw(memory:NVMM), width=(int)4000, height=(int)3000, framerate=(fraction)30/1 ! fakesink
nvarguscamerasrc do-timestamp=true awblock=true aelock=true sensor-id=1 name=nvcam1
! video/x-raw(memory:NVMM), width=(int)4000, height=(int)3000, framerate=(fraction)30/1 ! fakesink

This simply hangs sometimes when I set to the state to NULL and set my pipeline varialbe to None. If I recreate it than nvargus-daemon segfaults. See GST_DEBUG output above.

gst-launch-1.0 seems to work but I kinda smell a timing bug given setting the STATE sometimes works (SEGVs if I create a new pipeline with the same spec) and sometimes just hangs.

To give you the sequence of events:

STOP button pressed → self.recorder_pipeline.send_event(Gst.Event.new_eos())
Bus message handler called, message.type == Gst.MessageType.EOS → call on_eos() function
on_eos → self.recorder_pipeline.set_state(Gst.State.NULL) → HANG

The set_state() never comes back and from the debug messages it kinda looks like it never gets delivered to all of the elements in the pipeline???

More logs from nvargus-daemon when the function does sometimes come back but on restart SEGVs the daemon:

Aug 30 09:48:08 tx2 nvargus-daemon[5033]: message repeated 2 times: [ CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!]
Aug 30 09:48:09 tx2 systemd[5898]: Starting Notification regarding a crash report...
Aug 30 09:48:09 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed: Failed socket read: Connection reset by peer (in src/rpc/socket/common/SocketUtils.cpp, function readSocket(), line 79)
Aug 30 09:48:09 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed:  (propagating from libs/rpc_socket_server/ServerSocketManager.cpp, function recvThreadCore(), line 185)
Aug 30 09:48:09 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed:  (propagating from libs/rpc_socket_server/ServerSocketManager.cpp, function run(), line 58)
Aug 30 09:48:10 tx2 update-notifier-crash[5774]: python3
Aug 30 09:48:10 tx2 update-notifier-crash[5774]: nvargus-daemon
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: === python3[5618]: Connection closed (7FA31F81D0)=== python3[5618]: There were pending client requests in the server at the time of CameraProvider destruction, and they failed to complete within a reasonable time (1500 ms). This may be due to a hardware or libargus error. Forced destruction will now proceed, which may leave the libargus server in a bad state.=== python3[5618]: WARNING: CameraProvider was not destroyed before client connection terminated.=== python3[5618]:          The client may have abnormally terminated. Destroying CameraProvider...=== python3[5618]: CameraProvider destroyed (0x7f9d8205c0)=== python3[5618]: WARNING: Cleaning up 2 outstanding requests...=== python3[5618]: WARNING: Cleaning up 2 outstanding streams...(Argus) Error FileOperationFailed: Socket write failed: Broken pipe (in src/rpc/socket/common/SocketUtils.cpp, function writeSocket(), line 49)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed:  (propagating from libs/rpc_socket_server/ServerSocketManager.cpp, function sendResponse(), line 92)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed:  (propagating from libs/rpc_socket_server/ServerSocketManager.cpp, function handleRequest(), line 167)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed: Message processing failed (id = 222) (in libs/rpc_socket_server/ServerWorkerThread.cpp, function run(), line 143)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: === python3[5618]: WARNING: Cleaning up 2 outstanding stream settings...=== python3[5618]: WARNING: Cleaning up 2 outstanding sessions...(Argus) Error FileOperationFailed: Socket write failed: Broken pipe (in src/rpc/socket/common/SocketUtils.cpp, function writeSocket(), line 49)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed:  (propagating from libs/rpc_socket_server/ServerSocketManager.cpp, function sendResponse(), line 92)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed:  (propagating from libs/rpc_socket_server/ServerSocketManager.cpp, function handleRequest(), line 167)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: (Argus) Error FileOperationFailed: Message processing failed (id = 236) (in libs/rpc_socket_server/ServerWorkerThread.cpp, function run(), line 143)
Aug 30 09:48:11 tx2 nvargus-daemon[5033]: PowerServiceCore:handleRequests: timePassed = 25536
Aug 30 09:48:11 tx2 system-crash-no[5781]: GtkDialog mapped without a transient parent. This is discouraged.
Aug 30 09:48:14 tx2 systemd[5898]: update-notifier-crash.service: Main process exited, code=exited, status=1/FAILURE
Aug 30 09:48:14 tx2 systemd[5898]: update-notifier-crash.service: Failed with result 'exit-code'.
Aug 30 09:48:14 tx2 systemd[5898]: Failed to start Notification regarding a crash report.

To recap:

With that simple pipeline above under a Python3 script, if I set the pipeline to the NULL state it either:

a) Hangs for eternity

b) Goes through but on restart nvargus-daemon and my application crash <— certainly an indication of some kind of bad state the daemon got into.

Again, all of this worked fine on R28.2.1 (and still does using nvcamerasrc).

Just to reiterate why I think this might be nvarguscamerasrc and not my app is this:

0:00:09.697787650  7497     0x33e0a730 DEBUG       GST_ELEMENT_PADS gstelement.c:1856:gst_element_send_event: send eos event on element nvcam0
0:00:09.697812738  7497     0x33e0a730 DEBUG                basesrc gstbasesrc.c:1786:gst_base_src_send_event:<nvcam0> handling event 0x3445b830 eos event: 0x3445b830, time 99:99:99.999999999, seq-num 53, (NULL)
0:00:09.697833474  7497     0x33e0a730 DEBUG                basesrc gstbasesrc.c:3679:gst_base_src_set_flushing:<nvcam0> flushing 1
0:00:09.773695249  7497   0x7f640052d0 DEBUG             GST_MEMORY gstmemory.c:139:gst_memory_init: new memory 0x7f50001840, maxsize:976 offset:0 size:976
0:00:09.808484108  7497   0x7f640052d0 DEBUG             GST_MEMORY gstmemory.c:139:gst_memory_init: new memory 0x7f500018c0, maxsize:976 offset:0 size:976
0:00:09.835376335  7497   0x7f640052d0 DEBUG             bufferpool gstbufferpool.c:304:do_alloc_buffer:<bufferpool0> max buffers reached
0:00:09.835459215  7497   0x7f640052d0 DEBUG               GST_POLL gstpoll.c:1317:gst_poll_wait: 0x7f64005280: timeout :99:99:99.999999999

The last message is an EOS event to nvcamerasrc where it looks like it is flushing its internal buffers and just hangs on gst_poll_wait().
:
The question is why?

More details:

It seems:

0:00:06.772111097  7969     0x30912330 DEBUG       GST_ELEMENT_PADS gstelement.c:1856:gst_element_send_event: send eos event on element nvcam0
0:00:06.772133721  7969     0x30912330 DEBUG                basesrc gstbasesrc.c:1786:gst_base_src_send_event:<nvcam0> handling event 0x30ecf830 eos event: 0x30ecf830, time 99:99:99.999999999, seq-num 33, (NULL)
0:00:06.772151225  7969     0x30912330 DEBUG                basesrc gstbasesrc.c:3679:gst_base_src_set_flushing:<nvcam0> flushing 1

Causes gstreamer to call on:

/* flush all */
  gst_base_src_set_flushing (basesrc, TRUE);

Where basesrc is nvarguscamerasrc. Can you please talk to the developer about this? It is a real show stopper on R32.1 (and this is the latest version I can try since the Leopard Imaging camera is not supported on R32.2 yet).

More updates:

I can easily reproduce the nvargus-daemon crash by running:

gst-launch-1.0 matroskamux name=muxer streamable=true ! multifilesink name=mfsink post-messages=true next-file=5 max-file-duration=60000000000 location=capture_%08d.mkv \
nvarguscamerasrc maxperf=true do-timestamp=true awblock=true aelock=true sensor-id=0 name=nvcam0 \
! 'video/x-raw(memory:NVMM), width=(int)4000, height=(int)3000, framerate=(fraction)30/1' \
! nvtee ! nvvidconv flip-method=2 ! 'video/x-raw(memory:NVMM), width=(int)4000, height=(int)3000, format=(string)I420' \
! omxh265enc control-rate=2 bitrate=10000000 ! 'video/x-h265, stream-format=(string)byte-stream' ! h265parse ! queue ! muxer.video_0 \
nvarguscamerasrc maxperf=true do-timestamp=true awblock=true aelock=true sensor-id=1 name=nvcam1 \
! 'video/x-raw(memory:NVMM), width=(int)4000, height=(int)3000, framerate=(fraction)30/1' \
! nvtee ! nvvidconv flip-method=2 ! 'video/x-raw(memory:NVMM), width=(int)4000, height=(int)3000, format=(string)I420' \
! omxh265enc control-rate=2 bitrate=10000000 ! 'video/x-h265, stream-format=(string)byte-stream' ! h265parse ! queue ! muxer.video_1

This is with the Leopard Imaging IMX-377 camera and latest driver. The daemon crashes if I run this pipeline, hit CTRL-C, run this pipeline again (you have to do it several times since it appears to be a timing bug).

After it crashes, you need to restart the nvargus-daemon using systemctl and you can record again. Try it on R28.2.1 and everything just works (but substitute nvarguscamerasrc with nvcamerasrc).

Pretty sure this is a bug in nvarguscamearsrc and/or daemon.

@JerryChang: Please advise. This is a real showstopper.

If I try to emulate gst-launch-1.0’s behavior which is to set the pipeline to PAUSED, then READY, before NULL I see this:

3279 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: === gst-launch-1.0[25363]: CameraProvider destroyed (0x7fa1820460)=== gst-launch-1.0[25363]: Connection closed (7FA4D581D0)=== gst-launch-1.0[25363]: Connection cleaned up (7FA4D581D0)=== python3[25490]: Connection establi       shed (7FA4D581D0)NvPclHwGetModuleList: WARNING: Could not map module to ISP config string
  3280 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: NvPclHwGetModuleList: No module data found
  3281 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: NvPclHwGetModuleList: WARNING: Could not map module to ISP config string
  3282 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: NvPclHwGetModuleList: No module data found
  3283 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: NvPclHwGetModuleList: WARNING: Could not map module to ISP config string
  3284 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: NvPclHwGetModuleList: No module data found
  3285 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: OFParserGetVirtualDevice: NVIDIA Camera virtual enumerator not found in proc device-tree
  3286 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: ---- imager: Found override file [/var/nvidia/nvcam/settings/camera_overrides.isp]. ----
  3287 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!
  3288 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: ---- imager: Found override file [/var/nvidia/nvcam/settings/camera_overrides.isp]. ----
  3289 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!
  3290 Sep  1 11:20:09 tx2 nvargus-daemon[17689]: ---- imager: Found override file [/var/nvidia/nvcam/settings/camera_overrides.isp]. ----
  3291 Sep  1 11:20:10 tx2 nvargus-daemon[17689]: CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!
  3292 Sep  1 11:20:10 tx2 nvargus-daemon[17689]: === python3[25490]: CameraProvider initialized (0x7fa1c46a40)CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!
  3293 Sep  1 11:20:10 tx2 nvargus-daemon[17689]: CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!
  3294 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/services

/gl/EGLStreamProducer.cpp, function getBuffer(), line 349)
  3295 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/components/CaptureContainerImpl.cpp, function assignAllBuffersFromStream(), line 230)
  3296 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/components/stages/CCDataSetupStage.cpp, function doHandleRequest(), line 68)
  3297 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/components/stages/OrderedStage.cpp, function doExecute(), line 158)
  3298 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout: Sending critical error event (in src/api/Session.cpp, function sendErrorEvent(), line 990)
  3299 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
  3300 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: (Argus) Error InvalidState:  (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
  3301 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
  3302 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: (Argus) Error InvalidState:  (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
  3303 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
  3304 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: (Argus) Error InvalidState:  (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)

hello alex.sack,

nvargus-daemon service expecting camera device output sensor streaming continuously.
since you’re used ctrl^c to terminate the service, there still several buffers in queue and report some expected queue related failures,
for example,

... buffers still pending during EGLStreamProducer destruction...

however,
your log messages report timeout failures. is your camera sensor power-off as soon as ctrl^c commands received?

3293 Sep  1 11:20:10 tx2 nvargus-daemon[17689]: CAM: serial no file already exists, skips storing againLSC: LSC surface is not based on full res!
.....
  3294 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/services

/gl/EGLStreamProducer.cpp, function getBuffer(), line 349)
  3295 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/components/CaptureContainerImpl.cpp, function assignAllBuffersFromStream(), line 230)
  3296 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/components/stages/CCDataSetupStage.cpp, function doHandleRequest(), line 68)
  3297 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout:  (propagating from src/components/stages/OrderedStage.cpp, function doExecute(), line 158)
  3298 Sep  1 11:20:59 tx2 nvargus-daemon[17689]: SCF: Error Timeout: Sending critical error event (in src/api/Session.cpp, function sendErrorEvent(), line 990)

couple of suggestion as below,

  1. could you please have verification with normal shutdown sequence, please refer to MMAPI sample application, there’s frame count settings for terminate the application.
  2. you may also contact with Leopard Imaging for issue reporting.

@JerryChang: No camera sensor is always on. Also, this happens not just when I hit Ctrl-C but when I programmatically set the pipeline to Gst.State.NULL in Python - that’s where the freeze happens. Are you saying you think this is a camera driver issue and not an nvargus-daemon one? It’s very hard to say since I don’t see obvious errors from the driver itself.

I have sent Leopard Imaging this thread as a reference.

I can look at the MMAPI sample application, but my interest is mainly in GStreamer.

hello alex.sack,

understood you would like to terminate the sensor streaming via user-space, suggest you may refer to MMAPI samples.
please check the sample application terminate procedures,
for example,
waitForIdle(), reset(), thread.shutdown()…etc.

This does not apply to GStreamer. All of this is using GStreamer’s Python API.

Why not try it on your end? It’s easily reproducible.

Can you ask the developer on why nvargus-daemon crashes? That seems like a bug.

hello alex.sack,

suggest you may check your gstreamer python implementation, since I cannot reproduce Argus hang issue by launching with gstreamer and also MMAPI sample applications.

What two cameras were you using? I am wondering if this is a driver issue since the driver is fairly new.

I sincerely doubt my Python implementation is causing nvargus-daemon to crash (and regardless, any fatal crash of nvargus-daemon is a bug in my book).

Actually @JerryChang, I think at any moment you set the Gst.State.NULL and nvargus-daemon spews:

Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error Timeout: (propagating from src/services/gl/EGLStreamProducer.cpp, function getBuffer(), line 349)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error Timeout: (propagating from src/components/CaptureContainerImpl.cpp, function assignAllBuffersFromStream(), line 230)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error Timeout: (propagating from src/components/stages/CCDataSetupStage.cpp, function doHandleRequest(), line 68)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error Timeout: (propagating from src/components/stages/OrderedStage.cpp, function doExecute(), line 158)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error Timeout: Sending critical error event (in src/api/Session.cpp, function sendErrorEvent(), line 990)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)

and then this continuously:

Sep 5 15:06:21 tx2 nvargus-daemon[30372]: (Argus) Error InvalidState: (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: (Argus) Error InvalidState: (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: (Argus) Error InvalidState: (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: (Argus) Error InvalidState: (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
Sep 5 15:06:21 tx2 nvargus-daemon[30372]: (Argus) Error InvalidState: (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)

Is not really great no matter if it’s client caused (and I don’t know why setting the pipeline state to NULL should ever cause this).

CAN YOU PLEASE ASK WHAT ARE THE CAUSES OF THESE ERROR MESSAGES?

hello alex.sack,

What two cameras were you using? I am wondering if this is a driver issue since the driver is fairly new.
I’m testing with OV5693 multiple camera board. you could verify with v4l2 standard controls to check if this was driver issue.
for example,

v4l2-ctl -d /dev/video0 --set-fmt-video=width=2592,height=1944,pixelformat=RG10 --set-ctrl bypass_mode=0 --stream-mmap --stream-count=1000
v4l2-ctl -d /dev/video1 --set-fmt-video=width=2592,height=1944,pixelformat=RG10 --set-ctrl bypass_mode=0 --stream-mmap --stream-count=1000

you might consider sensor output streaming to SCF library, SCF library convert it and allocate buffers for saving sensor frames into EGL buffers, gstreamer application to consume these buffers.
instead of setting NULL, you should sending end-of-stream signal before shutdown the pipeline.

suggest you might also check similar discussion thread for working with python+gstreamer, Topic 1025356.
thanks

Did you read anything above this post? Please re-read comment #3, i.e. I am sending EOS, catching it, and then setting pipeline state to NULL. But even setting it to PAUSED then READY likst gst-launch-1.0 causes the hang.

I’ll try the commands above. I get an error:

Sep 6 08:00:17 tx2 kernel: [235353.896573] tegra-vi4 15700000.vi: tegra_channel_error_recovery: attempting to reset the capture channel
Sep 6 08:00:17 tx2 kernel: [235353.907790] nvcsi 150c0000.nvcsi: csi4_stream_check_status (0) INTR_STATUS 0x00000004
Sep 6 08:00:17 tx2 kernel: [235353.915837] nvcsi 150c0000.nvcsi: csi4_stream_check_status (0) ERR_INTR_STATUS 0x00000004
Sep 6 08:00:17 tx2 kernel: [235353.973781] tegra-vi4 15700000.vi: PXL_SOF syncpt timeout! err = -11
Sep 6 08:00:17 tx2 kernel: [235353.980505] tegra-vi4 15700000.vi: tegra_channel_error_recovery: attempting to reset the capture channel
Sep 6 08:00:17 tx2 kernel: [235353.992859] nvcsi 150c0000.nvcsi: csi4_stream_check_status (2) INTR_STATUS 0x00000004
Sep 6 08:00:17 tx2 kernel: [235354.000968] nvcsi 150c0000.nvcsi: csi4_stream_check_status (2) ERR_INTR_STATUS 0x00000004
Sep 6 08:00:17 tx2 kernel: [235354.125767] tegra-vi4 15700000.vi: PXL_SOF syncpt timeout! err = -11
Sep 6 08:00:17 tx2 kernel: [235354.132434] tegra-vi4 15700000.vi: tegra_channel_error_recovery: attempting to reset the capture channel
Sep 6 08:00:17 tx2 kernel: [235354.213729] tegra-vi4 15700000.vi: PXL_SOF syncpt timeout! err = -11
Sep 6 08:00:17 tx2 kernel: [235354.220276] tegra-vi4 15700000.vi: tegra_channel_error_recovery: attempting to reset the capture channel
Sep 6 08:00:17 tx2 kernel: [235354.230155] nvcsi 150c0000.nvcsi: csi4_stream_check_status (2) INTR_STATUS 0x00000004
Sep 6 08:00:17 tx2 kernel: [235354.238230] nvcsi 150c0000.nvcsi: csi4_stream_check_status (2) ERR_INTR_STATUS 0x00000004

Kinda sits here:

Thread 7 (Thread 0x7fa19481f0 (LWP 11781)):
#0 0x0000007fb7f9522c in futex_wait_cancelable (private=, expected=0, futex_word=0x13116a8) at …/sysdeps/unix/sysv/linux/futex-internal.h:88
#1 0x0000007fb7f9522c in __pthread_cond_wait_common (abstime=0x0, mutex=0x1311650, cond=0x1311680) at pthread_cond_wait.c:502
#2 0x0000007fb7f9522c in __pthread_cond_wait (cond=0x1311680, mutex=0x1311650) at pthread_cond_wait.c:655
#3 0x0000007fae96ffdc in () at /usr/lib/aarch64-linux-gnu/tegra/libnvos.so
#4 0x0000007fae96eb60 in NvOsSemaphoreWaitTimeout () at /usr/lib/aarch64-linux-gnu/tegra/libnvos.so
#5 0x0000007fa1bcad08 in () at /usr/lib/aarch64-linux-gnu/tegra/libnvomx.so
#6 0x0000007fae96f628 in () at /usr/lib/aarch64-linux-gnu/tegra/libnvos.so
#7 0x0000007fb7f8f088 in start_thread (arg=0x7fffffad3f) at pthread_create.c:463
#8 0x0000007fb7db14ec in thread_start () at …/sysdeps/unix/sysv/linux/aarch64/clone.S:78

Then nvargus-daemon gets into some error state.

Whether support can reproduce this or not, I think the authors of nvargus-daemon need to explain this crash/error state above.

Here is a script that reproduces the issue:

https://drive.google.com/file/d/1IWqQOeVqt-sifhWBjZnCBK1bjePJtKev/view?usp=sharing

Hi,
Please upgrade to r32.2. We have a fix for 1051362 in nvarguscamerasrc. It is similar to this one that it cannot terminate the pipeline.

hello alex.sack,

FYI,
I also found it works normally by removing 2nd nvarguscamerasrc from the RECORD_PIPELINE.

however, you might review your python scripts commands.
since I has also verified enable dual camera sources for video encoding works.
for example,

$ gst-launch-1.0 nvarguscamerasrc sensor-id=0 num-buffers=300 ! 'video/x-raw(memory:NVMM), width=2592, height=1944, framerate=30/1' ! nvtee ! omxh264enc bitrate=20000000 ! qtmux ! filesink location=video0.mp4  nvarguscamerasrc sensor-id=1 num-buffers=300 ! 'video/x-raw(memory:NVMM), width=2592, height=1944, framerate=30/1' ! nvtee ! omxh264enc bitrate=20000000 ! qtmux ! filesink location=video1.mp4