Argus camera instability issue

My TX2 connect to 3 AR0144 cameras.
I implement a test app based on argus camera api.
After that I do power up and power down test for 200 times.
I find instability issue.

  1. Failed to create CameraProvider issue (about 10 times)
  2. Acquire a frame failed issue when the first time(about 10 times)
  3. Acquire a frame failed issue after acquire some frames(about 10 times)

The following is my source code.

ArgusCamera::ArgusCamera() {
  _camera_provider = UniqueObj<CameraProvider>(CameraProvider::create());
  _icamera_provider = interface_cast<ICameraProvider>(_camera_provider);
  if (!_icamera_provider) {
    MLOG(AGENT, FATAL, "Failed to create CameraProvider");
  }

  // Get the camera devices
  _icamera_provider->getCameraDevices(&_camera_devices);
  if (_camera_devices.size() == 0) {
    MLOG(AGENT, INFO, "No cameras available");
  }
  MLOG(AGENT, INFO, "_camera_devices.size() = %ld", _camera_devices.size());

  for (int i = 0; i < MAX_CHANNEL; i++) {
    _dmabuf[i] = -1;
  }
}

bool ArgusCamera::create_session(unsigned int dev_index, int exposure_mode,
                                 int exposure_val) {
  if (dev_index > _camera_devices.size()) {
    MLOG(AGENT, FATAL, "Invalid device index");
    return false;
  }

  // create the new capture session
  _session[dev_index] = UniqueObj<CaptureSession>(
      _icamera_provider->createCaptureSession(_camera_devices[dev_index]));
  if (!_session[dev_index]) {
    MLOG(AGENT, FATAL, "Failed to create CaptureSession");
    return false;
  }
  // createOutputStream
  ICaptureSession *iCaptureSession =
      interface_cast<ICaptureSession>(_session[dev_index]);
  if (!iCaptureSession) {
    MLOG(AGENT, FATAL, "Failed to get ICaptureSession interface");
    return false;
  }
  UniqueObj<OutputStreamSettings> streamSettings(
      iCaptureSession->createOutputStreamSettings(STREAM_TYPE_EGL));
  IEGLOutputStreamSettings *iEGLStreamSettings =
      interface_cast<IEGLOutputStreamSettings>(streamSettings);
  if (!iEGLStreamSettings) {
    MLOG(AGENT, FATAL, "Failed to create OutputStreamSettings");
    return false;
  }
  iEGLStreamSettings->setPixelFormat(PIXEL_FMT_YCbCr_420_888);
#if 0
  NvEglRenderer *renderer = NvEglRenderer::createEglRenderer(
      "renderer0", DEFAULT_HEIGHT, DEFAULT_HEIGHT, 0, 0);
  iEGLStreamSettings->setEGLDisplay(renderer->getEGLDisplay());
#endif
#if 0
  EGLDisplay eglDisplay = EGL_NO_DISPLAY;
  // Get default EGL display
  eglDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY);
  if (eglDisplay == EGL_NO_DISPLAY) {
    printf("Cannot get EGL display.\n");
    return EXIT_FAILURE;
  }
  iEGLStreamSettings->setEGLDisplay(eglDisplay);
#endif
  iEGLStreamSettings->setResolution(
      Size2D<uint32_t>(DEFAULT_WIDTH, DEFAULT_HEIGHT));
  //    iEGLStreamSettings->setEGLDisplay(g_display.get());
  //    iEGLStreamSettings->setMetadataEnable(true);

  UniqueObj<OutputStream> outputStream(
      iCaptureSession->createOutputStream(streamSettings.get()));
  if (!outputStream) {
    MLOG(AGENT, FATAL, "Failed to create OutputStream");
    return false;
  }
  _output_stream[dev_index].reset(outputStream.release());

  // Create the FrameConsumer.
  _consumer[dev_index] = UniqueObj<FrameConsumer>(
      FrameConsumer::create(_output_stream[dev_index].get()));
  if (!_consumer[dev_index]) {
    MLOG(AGENT, FATAL, "Failed to create FrameConsumer");
    return false;
  }

  // Create capture request and enable output stream.
  UniqueObj<Request> request(iCaptureSession->createRequest());
  IRequest *iRequest = interface_cast<IRequest>(request);
  if (!iRequest) {
    MLOG(AGENT, FATAL, "Failed to create Request");
    return false;
  }

  iRequest->enableOutputStream(_output_stream[dev_index].get());

  // set fps
  ISourceSettings *iSourceSettings =
      interface_cast<ISourceSettings>(iRequest->getSourceSettings());
  if (!iSourceSettings) {
    MLOG(AGENT, FATAL, "Failed to get source settings interface");
    return false;
  }
  if (iSourceSettings->setFrameDurationRange(
          Range<uint64_t>(1e9 / CAPTURE_FPS)) != STATUS_OK) {
    MLOG(AGENT, FATAL, "Failed to setFrameDurationRange");
    return false;
  }

  // set manual exposure
  if (exposure_mode == 1) {
    // get the autocontrol settings and set the exposure compensation value
    IAutoControlSettings *iAutoControlSettings = NULL;
    iAutoControlSettings = interface_cast<IAutoControlSettings>(
        iRequest->getAutoControlSettings());
    if (!iAutoControlSettings) {
      MLOG(AGENT, FATAL, "Failed to get IAutoControlSettings interface");
      return false;
    }

    // lock AE
    if (iAutoControlSettings->setAeLock(true) != STATUS_OK) {
      MLOG(AGENT, FATAL, "Failed to set AE lock");
      return false;
    }

    ISourceSettings *iSourceSettings =
        interface_cast<ISourceSettings>(iRequest->getSourceSettings());
    if (!iSourceSettings) {
      MLOG(AGENT, FATAL, "Failed to get source settings interface");
      return false;
    }
    if (iSourceSettings->setExposureTimeRange(exposure_val) != STATUS_OK) {
      MLOG(AGENT, FATAL, "Failed to set exposure time range");
      return false;
    }
  }

  // Submit capture requests.
  if (iCaptureSession->repeat(request.get()) != STATUS_OK) {
    MLOG(AGENT, FATAL, "Failed to start repeat capture request");
    return false;
  }
  return true;
}

bool ArgusCamera::capture_frame(unsigned int dev_index, unsigned char *buf) {
  IFrameConsumer *iFrameConsumer =
      interface_cast<IFrameConsumer>(_consumer[dev_index]);

  // Acquire a frame.
  UniqueObj<Frame> frame(iFrameConsumer->acquireFrame(1000 * 1000 * 1000));
  IFrame *iFrame = interface_cast<IFrame>(frame);
  if (!iFrame) {
    MLOG(AGENT, FATAL, "Acquire a frame failed");
    return false;
  }

  // Get the IImageNativeBuffer extension interface.
  NV::IImageNativeBuffer *iNativeBuffer =
      interface_cast<NV::IImageNativeBuffer>(iFrame->getImage());
  if (!iNativeBuffer) {
    MLOG(AGENT, FATAL, "IImageNativeBuffer not supported by Image");
    return false;
  }

  // If we don't already have a buffer, create one from this image.
  // Otherwise, just blit to our buffer.
  // the fisrt time, we do not need to do copyToNvBuffer
  if (_dmabuf[dev_index] == -1) {
    _dmabuf[dev_index] = iNativeBuffer->createNvBuffer(
        Size2D<uint32_t>(DEFAULT_WIDTH, DEFAULT_HEIGHT),
        NvBufferColorFormat_YUV420, NvBufferLayout_Pitch);
    if (!_dmabuf[dev_index]) {
      MLOG(AGENT, FATAL, "Failed to create NvBuffer");
      return false;
    }
  } else if (iNativeBuffer->copyToNvBuffer(_dmabuf[dev_index]) != STATUS_OK) {
    MLOG(AGENT, FATAL, "Failed to copy frame to NvBuffer");
    return false;
  }
  // copy data to out buf
  void *pdata = NULL;

  NvBufferMemMap(_dmabuf[dev_index], 0, NvBufferMem_Read, &pdata);
  NvBufferMemSyncForCpu(_dmabuf[dev_index], 0, &pdata);
  memcpy(buf, pdata, DEFAULT_WIDTH * DEFAULT_HEIGHT);
  NvBufferMemUnMap(_dmabuf[dev_index], 0, &pdata);

  return true;
}

the abnormal log:

2020/02/15 03:39:26.810 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 03:39:27.493 INFO AGENT [argus_camera.cpp:50] _camera_devices.size() = 3
2020/02/15 03:39:28.429 INFO AGENT [argus_camera_agent.cpp:112] start to capture first frame
2020/02/15 03:40:02.592 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 1800 frame
2020/02/15 03:40:02.603 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 1800 frame
2020/02/15 03:40:02.611 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 1800 frame
2020/02/15 03:40:32.698 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 3600 frame
2020/02/15 03:40:32.711 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 3600 frame
2020/02/15 03:40:32.718 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 3600 frame
2020/02/15 03:40:48.538 FATAL AGENT [argus_camera.cpp:206] Acquire a frame failed
2020/02/15 03:40:48.538 FATAL AGENT [argus_camera_agent.cpp:120] capture_frame failed
2020/02/15 03:40:48.538 FATAL AGENT [argus_camera_agent.cpp:185] capture_frame stopped
2020/02/15 03:40:49.628 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 03:40:49.773 FATAL AGENT [argus_camera.cpp:40] Failed to create CameraProvider
2020/02/15 03:40:51.396 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 03:40:52.049 INFO AGENT [argus_camera.cpp:50] _camera_devices.size() = 3
2020/02/15 03:40:52.748 INFO AGENT [argus_camera_agent.cpp:112] start to capture first frame

2020/02/15 03:42:29.839 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 03:42:30.457 INFO AGENT [argus_camera.cpp:50] _camera_devices.size() = 3
2020/02/15 03:42:31.427 INFO AGENT [argus_camera_agent.cpp:112] start to capture first frame
2020/02/15 03:42:42.702 FATAL AGENT [argus_camera.cpp:206] Acquire a frame failed
2020/02/15 03:42:42.702 FATAL AGENT [argus_camera_agent.cpp:162] capture_frame failed
2020/02/15 03:42:42.702 FATAL AGENT [argus_camera_agent.cpp:185] capture_frame stopped
2020/02/15 03:42:43.851 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 03:42:44.563 FATAL AGENT [argus_camera.cpp:40] Failed to create CameraProvider
2020/02/15 03:42:46.163 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 03:42:46.774 INFO AGENT [argus_camera.cpp:50] _camera_devices.size() = 3
2020/02/15 03:42:47.393 INFO AGENT [argus_camera_agent.cpp:112] start to capture first frame
2020/02/15 03:43:17.997 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 1800 frame
2020/02/15 03:43:18.009 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 1800 frame
2020/02/15 03:43:18.016 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 1800 frame
2020/02/15 03:43:48.294 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 3600 frame
2020/02/15 03:43:48.309 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 3600 frame
2020/02/15 03:43:48.314 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 3600 frame

the normal log:

2020/02/15 08:15:29.778 INFO AGENT [argus_camera_agent.cpp:207] start argus_camera_agent
2020/02/15 08:15:30.420 INFO AGENT [argus_camera.cpp:50] _camera_devices.size() = 3
2020/02/15 08:15:31.491 INFO AGENT [argus_camera_agent.cpp:112] start to capture first frame
2020/02/15 08:16:04.979 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 1800 frame
2020/02/15 08:16:04.990 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 1800 frame
2020/02/15 08:16:04.998 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 1800 frame
2020/02/15 08:16:34.893 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 3600 frame
2020/02/15 08:16:34.903 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 3600 frame
2020/02/15 08:16:34.911 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 3600 frame
2020/02/15 08:17:05.541 INFO AGENT [argus_camera_agent.cpp:134] camera 0 get 5400 frame
2020/02/15 08:17:05.552 INFO AGENT [argus_camera_agent.cpp:155] camera 1 get 5400 frame
2020/02/15 08:17:05.559 INFO AGENT [argus_camera_agent.cpp:176] camera 2 get 5400 frame

system version:

R32 (release), REVISION: 2.1, GCID: 16294929, BOARD: t186ref, EABI: aarch64, DATE: Tue Aug 13 04:45:36 UTC 2019

syslog:

Feb 15 03:40:47 miivii-tegra systemd[1]: Started PackageKit Daemon.
Feb 15 03:40:47 miivii-tegra nvargus-daemon[5759]: CAM: serial no file already exists, skips storing againSCF: Error NotSupported: AMR Sample data type is error, requested type is IspRawStats* (in src/components/amr/Sample.cpp, function typeError(), line 65)
Feb 15 03:40:47 miivii-tegra nvargus-daemon[5759]: SCF: Error NotSupported:  (in src/components/amr/Sample.cpp, function get(), line 101)
Feb 15 03:40:47 miivii-tegra nvargus-daemon[5759]: SCF: Error NotSupported:  (propagating from src/common/Amr.h, function getSampleObject(), line 488)
Feb 15 03:40:47 miivii-tegra nvargus-daemon[5759]: SCF: Error NotSupported:  (propagating from src/components/ac_stages/AeAfApplyStage.cpp, function translateIspOutStatsToFrd(), line 281)
Feb 15 03:40:47 miivii-tegra nvargus-daemon[5759]: SCF: Error NotSupported:  (propagating from src/components/ac_stages/AeAfApplyStage.cpp, function doHandleRequest(), line 618)
Feb 15 03:40:47 miivii-tegra nvargus-daemon[5759]: SCF: Error NotSupported:  (propagating from src/components/stages/OrderedStage.cpp, function doExecute(), line 137)

Could you help me on this, thank you very much!

Please have argus_camera APP to check if can repo the issue too.

I do one more test to debug this issue.

Test:
1.power up the board
2.login the system through debug serial port
3.run my app(argus_camera_agent)
I do this test for 50 times without any error.

Compare with the previous test:
1.power up the board
2.run my app(argus_camera_agent is launched by systemd)
I do this test for 200 times with 20 times errors.

So I suspect the root cause is the different way to launch my app.
By the way my test app use EGL library,
I guess the root reason may be related with that EGL library.

My systemd script:

[Unit]
Description=my test daemon
After=nvargus-daemon.service
[Service]
ExecStart=/home/nvidia/work/build/argus_camera_agent
WorkingDirectory=/home/nvidia/work/build
Restart=always
Type=simple
User=nvidia
[Install]
; WantedBy=multi-user.target
WantedBy=graphical.target

@ShaneCCC I think argus_camera APP also can work well without error.

One more question, I do not know the difference between STREAM_TYPE_BUFFER and STREAM_TYPE_EGL.
Could you explain a little for me?
My test app wants to receive the data from argus api and do some calculation without rendering(I do need to show camera on scream).
based on this scenario, which type is suitable for me?
I am looking forward to your reply very much~

One more test:
1.power up the board
2.run my app(argus_camera_agent is launched by systemd)
I add ExecStartPre=/bin/sleep 30 to my systemd script, it means systemd will delay 30s then launch my test app.
I do this test for 184 times with 1 time error.
Repo rate decrease from 20/200 to 1/184.
the only one difference is that I add “ExecStartPre=/bin/sleep 30”

  1. STREAM_TYPE_BUFFER should be good for your case.

  2. Have a try build your APP by cmake -DDISABLE_MULTIPROCESS=ON

    'cmake ..'
    

    Additional options:
    - If CMake cannot find an include path for any dependencies, it may be
    required to provide them explicitly. Eg:
    ‘cmake -DOPENGLES_INCLUDE_DIR=/path/to/khronos/includes …’
    - The DISABLE_MULTIPROCESS option may be provided to use the single-process
    Argus implementation (ie. does not require nvargus-daemon service):
    ‘cmake -DDISABLE_MULTIPROCESS=ON …’

1.“STREAM_TYPE_BUFFER should be good for my case”
So you mean using STREAM_TYPE_EGL type is also can work for my scenario. is that right?
you can see my source code is based on STREAM_TYPE_EGL.
I have checked STREAM_TYPE_BUFFER sample source code(tegra_multimedia_api/samples/10_camera_recording), it is more difficult to understand.
If STREAM_TYPE_EGL also can can work for my scenario, I will continue to debug this.
2. I am checking this now based on my source code (STREAM_TYPE_EGL type)
Thank you very much for the quick reply.

  1. Have a try build your APP by cmake -DDISABLE_MULTIPROCESS=ON
    I do power up and power down test for 180 times with 6 times error.
    But “Failed to create CameraProvider issue” disappeared.
    All the 6 times error is “Acquire a frame failed”. that is not the first time to acquire a frame. usually after get ~100 frames and then acquire a frame failed.
    syslog:
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported: AMR Sample data type is error, requested type is IspRawStats* (in src/components/amr/Sample.cpp, function typeError(), line 65)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported:  (in src/components/amr/Sample.cpp, function get(), line 101)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported:  (propagating from src/common/Amr.h, function getSampleObject(), line 488)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported:  (propagating from src/components/ac_stages/AeAfApplyStage.cpp, function translateIspOutStatsToFrd(), line 281)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported:  (propagating from src/components/ac_stages/AeAfApplyStage.cpp, function doHandleRequest(), line 618)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported:  (propagating from src/components/stages/OrderedStage.cpp, function doExecute(), line 137)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error NotSupported: Sending critical error event (in src/api/Session.cpp, function sendErrorEvent(), line 990)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: (Argus) Error InvalidState:  (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: SCF: Error InvalidState: Session has suffered a critical failure (in src/api/Session.cpp, function capture(), line 667)
Feb 19 07:19:53 miivii-tegra argus_camera_agent[7908]: (Argus) Error InvalidState:  (propagating from src/api/ScfCaptureThread.cpp, function run(), line 109)

Did you boost the system to try.

sudo nvpmodel -m 0
sudo jetson_clocks

Also try to boost the nvcs/vi/isp clock.

sudo su
echo 1 > /sys/kernel/debug/bpmp/debug/clk/vi/mrq_rate_locked
echo 1 > /sys/kernel/debug/bpmp/debug/clk/isp/mrq_rate_locked
echo 1 > /sys/kernel/debug/bpmp/debug/clk/nvcsi/mrq_rate_locked
cat /sys/kernel/debug/bpmp/debug/clk/vi/max_rate
cat /sys/kernel/debug/bpmp/debug/clk/isp/max_rate
cat /sys/kernel/debug/bpmp/debug/clk/nvcsi/max_rate
echo ${max_rate} > /sys/kernel/debug/bpmp/debug/clk/vi/rate
echo ${max_rate} > /sys/kernel/debug/bpmp/debug/clk/isp/rate
echo ${max_rate} > /sys/kernel/debug/bpmp/debug/clk/nvcsi/rate

I modify my test app from STREAM_TYPE_EGL to STREAM_TYPE_BUFFER.
tegra_multimedia_api/samples/10_camera_recording is a reference code. I remove encoding feature.
I did power up and power down test, I also can see “Acquire a frame failed”.

after that, I follow your suggestion.
I run the following commands before my argus app last night.

sleep 10   //sleep 10 sec, i will disable this and do power up and down test again.
jetson_clocks
echo 1 > /sys/kernel/debug/bpmp/debug/clk/vi/mrq_rate_locked
echo 1 > /sys/kernel/debug/bpmp/debug/clk/isp/mrq_rate_locked
echo 1 > /sys/kernel/debug/bpmp/debug/clk/nvcsi/mrq_rate_locked
echo 1036800000 > /sys/kernel/debug/bpmp/debug/clk/vi/rate
echo 1126400000 >  /sys/kernel/debug/bpmp/debug/clk/isp/rate
echo 225000000 >  /sys/kernel/debug/bpmp/debug/clk/nvcsi/rate
nohup ./argus_camera_agent & //my test app

I tested power up and power down for 200 times without any errors.
But is boosting the final solution? lack of CPU and clks will lead to frame lost in my opinion.

Add the serdes_pix_clk_hz to the device tree and set it as much higher than the pix_clk_hz to try.

https://docs.nvidia.com/jetson/l4t/index.html#page/Tegra%2520Linux%2520Driver%2520Package%2520Development%2520Guide%2Fcamera_sensor_prog.html%23wwpID0E0P50HA

Hi, this issue also can be reproduced with serdes_pix_clk_hz.
My board has 3 AR0144(1280X720 60fps) and one fpaga camera(1280X720 20fps).
my AR0144 device tree:

mode0 {
		mclk_khz = "27000";
		num_lanes = "2";
		tegra_sinterface = "serial_d";
		discontinuous_clk = "yes";
		dpcm_enable = "false";
		cil_settletime = "20";

		dynamic_pixel_bit_depth = "12";
		csi_pixel_bit_depth = "12";
		mode_type = "bayer";
		pixel_phase = "grbg";
		pixel_t = "bayer_grbg12";

		active_w = "1280";
		active_h = "720";
		readout_orientation = "0";
		line_length = "1488";
		inherent_gain = "1";
		mclk_multiplier = "4";
		pix_clk_hz = "74250000";
		erdes_pix_clk_hz = "833333333" //as you said~
		min_gain_val = "1";
		max_gain_val = "31";
		min_hdr_ratio = "1";
		max_hdr_ratio = "64";
		min_framerate = "6";
		max_framerate = "60";
		min_exp_time = "200";
		max_exp_time = "166332";
		embedded_metadata_height = "4";
	};

By the way, my AR0144 is not GMSL or FPD link.

Did you verify the sensors worked stable by v4l2-ctl?
If you have verify by v4l2-ctl try to add set_mode_delay_ms property to try.

https://docs.nvidia.com/jetson/l4t/index.html#page/Tegra%2520Linux%2520Driver%2520Package%2520Development%2520Guide%2Fcamera_sensor_prog.html%23wwpID0E0T50HA

Did you verify the sensors worked stable by v4l2-ctl?-> I will check.
By the way, “Acquire a frame failed issue” is not the first time to acquire frame, usually between 10+ ~ 200 frames.

Hi wufan10618,

Did you verify the sensors worked stable by v4l2-ctl?-> I will check.

Any result can be shared?