K1 continuous double DMA for I2S audio

I am seeing buffer overruns in ALSA userspace when recording, and suspect I might need to enable continuous double DMA for I2S audio data on a K1 SoC, for receiving TDM audio streams reliably.

I am using the 3.10 NVIDIA L4T 21.4 kernel release.

In the L4T kernels, there is a file:

sound/soc/tegra/tegra_tdm_pcm.c
/*
 * tegra_tdm_pcm.c - Tegra TDM PCM driver
 *
 * Author: Nitin Pai <npai@nvidia.com>
 * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
 *
 * Based on code copyright/by:
 *
 * Copyright (c) 2009-2010, NVIDIA CORPORATION.  All rights reserved.
 * Scott Peterson <speterson@nvidia.com>
 * Stephen Warren <swarren@nvidia.com>
 * Vijay Mali <vmali@nvidia.com>
 *
 * Copyright (C) 2010 Google, Inc.
 * Iliyan Malchev <malchev@google.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 *
 */

#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <sound/core.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/soc.h>

#include "tegra_pcm.h"

#define DRV_NAME "tegra-tdm-pcm-audio"

static const struct snd_pcm_hardware tegra_tdm_pcm_hardware = {
	.info			= SNDRV_PCM_INFO_MMAP |
				  SNDRV_PCM_INFO_MMAP_VALID |
				  SNDRV_PCM_INFO_PAUSE |
				  SNDRV_PCM_INFO_RESUME |
				  SNDRV_PCM_INFO_INTERLEAVED,
	.formats		= SNDRV_PCM_FMTBIT_S16_LE,
	.channels_min		= 8,
	.channels_max		= 16,
	.period_bytes_min	= 8 * 1024,
	.period_bytes_max	= 16 * 1024,
	.periods_min		= 4,
	.periods_max		= 4,
	.buffer_bytes_max	= 16 * 4 * 1024,
	.fifo_size		= 4,
};

static int tegra_tdm_pcm_open(struct snd_pcm_substream *substream)
{
	return tegra_pcm_allocate(substream,
					TEGRA_DMA_MODE_CONTINUOUS_DOUBLE,
					&tegra_tdm_pcm_hardware);

}

static int tegra_tdm_pcm_close(struct snd_pcm_substream *substream)
{
	return tegra_pcm_close(substream);
}

static int tegra_tdm_pcm_hw_params(struct snd_pcm_substream *substream,
				struct snd_pcm_hw_params *params)
{
	return tegra_pcm_hw_params(substream, params);
}

static int tegra_tdm_pcm_hw_free(struct snd_pcm_substream *substream)
{
	return tegra_pcm_hw_free(substream);
}

static int tegra_tdm_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
{
	return tegra_pcm_trigger(substream, cmd);
}

static int tegra_tdm_pcm_mmap(struct snd_pcm_substream *substream,
				struct vm_area_struct *vma)
{
	return tegra_pcm_mmap(substream, vma);
}

static struct snd_pcm_ops tegra_tdm_pcm_ops = {
	.open		= tegra_tdm_pcm_open,
	.close		= tegra_tdm_pcm_close,
	.ioctl		= snd_pcm_lib_ioctl,
	.hw_params	= tegra_tdm_pcm_hw_params,
	.hw_free	= tegra_tdm_pcm_hw_free,
	.trigger	= tegra_tdm_pcm_trigger,
	.pointer	= tegra_pcm_pointer,
	.mmap		= tegra_tdm_pcm_mmap,
};

static int tegra_tdm_pcm_new(struct snd_soc_pcm_runtime *rtd)
{
	return tegra_pcm_dma_allocate(rtd ,
				tegra_tdm_pcm_hardware.buffer_bytes_max);
}

static void tegra_tdm_pcm_free(struct snd_pcm *pcm)
{
	return tegra_pcm_free(pcm);
}

struct snd_soc_platform_driver tegra_tdm_pcm_platform = {
	.ops		= &tegra_tdm_pcm_ops,
	.pcm_new	= tegra_tdm_pcm_new,
	.pcm_free	= tegra_tdm_pcm_free,
};

static int __devinit tegra_tdm_pcm_platform_probe(struct platform_device *pdev)
{
	return snd_soc_register_platform(&pdev->dev, &tegra_tdm_pcm_platform);
}

static int __devexit tegra_tdm_pcm_platform_remove(struct platform_device *pdev)
{
	snd_soc_unregister_platform(&pdev->dev);
	return 0;
}

static struct platform_driver tegra_tdm_pcm_driver = {
	.driver = {
		.name = DRV_NAME,
		.owner = THIS_MODULE,
	},
	.probe = tegra_tdm_pcm_platform_probe,
	.remove = __devexit_p(tegra_tdm_pcm_platform_remove),
};

static int __init snd_tegra_tdm_pcm_init(void)
{
	return platform_driver_register(&tegra_tdm_pcm_driver);
}
module_init(snd_tegra_tdm_pcm_init);

static void __exit snd_tegra_tdm_pcm_exit(void)
{
	platform_driver_unregister(&tegra_tdm_pcm_driver);
}
module_exit(snd_tegra_tdm_pcm_exit);

MODULE_AUTHOR("Nitin Pai <npai@nvidia.com>");
MODULE_DESCRIPTION("Tegra PCM ASoC driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" DRV_NAME);

This seems to wrap the normal PCM setup callbacks, while changing buffer sizes, and enabling double continuous mode DMA.

However, this file refers to tegra_pcm_allocate and TEGRA_DMA_MODE_CONTINUOUS_DOUBLE, neither of which exist in any ref in the 3.10 L4T kernel repo, and in only one place on the internet, so it looks like a singleton.

https://github.com/Kahlo007/cm_kernel_lenovo_kai/blob/master/sound/soc/tegra/tegra_pcm.c

I was just curious if this had been tried for this kernel, or if there’s a simple way to enable continuous double DMA for the audio PCM.

Thanks,

Ed Cragg

Hi Ed,

I cannot say that I am familiar with this file and it is not included in our more recent releases. However, rather than using this continuous double buffer feature, you can just try to increase the DMA buffering in general …

diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c
index 51903fdda089..4eb8ea78d6bd 100644
--- a/sound/soc/tegra/tegra_pcm.c
+++ b/sound/soc/tegra/tegra_pcm.c
@@ -56,10 +56,10 @@ static const struct snd_pcm_hardware tegra_pcm_hardware = {
        .channels_min           = 1,
        .channels_max           = 2,
        .period_bytes_min       = 128,
-       .period_bytes_max       = PAGE_SIZE * 2,
+       .period_bytes_max       = PAGE_SIZE * 4,
        .periods_min            = 1,
        .periods_max            = 8,
-       .buffer_bytes_max       = PAGE_SIZE * 8,
+       .buffer_bytes_max       = PAGE_SIZE * 16,
        .fifo_size              = 4,
 };

Also, for testing it can be worth setting the cpufreq governor to ‘performance’ (if not already) …

$ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor

Regards,
Jon

Hi Jon,

Thanks for your response. I have already tried increasing the buffer size, and although it reduces the number of overruns, it doesn’t eliminate them.

Also cpufreq is already set to performance,

# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
performance

Cheers,
Ed

Hi Ed,

For testing, can you also try writing the captured file to a RAM disk?

sudo mkdir /mnt/tegra_audio_test
sudo mount -t tmpfs -o size=100m tmpfs /mnt/tegra_audio_test

Then direct arecord to write to /mnt/tegra_audio_test. Obviously, you can change the size of the RAM disk to meet your needs. I assume that you are currently writing to the eMMC and I wanted to eliminate this as the bottleneck.

Regards,
Jon

Hi Jon,

I have previously been writing to /dev/null, but i did just try the ramdisk as you suggested and it produces a similar number of overruns as with /dev/null.

Thanks,
Ed

Hi Ed,

What is the frequency of the following …

ubuntu@tegra-ubuntu:~$ sudo cat /sys/kernel/debug/clock/apbdma/rate
12000000
ubuntu@tegra-ubuntu:~$ sudo cat /sys/kernel/debug/clock/apbif/rate
12000000
ubuntu@tegra-ubuntu:~$ sudo cat /sys/kernel/debug/clock/d_audio/rate
11289600

If the DMA is only operating at 12MHz, I am wondering if it is fast enough.

Regards,
Jon

Hi Jon,

This is what i get:

root@mghd271-tk1:~# cat /sys/kernel/debug/clock/apbdma/rate
12000000
root@mghd271-tk1:~# cat /sys/kernel/debug/clock/apbif/rate
12000000
root@mghd271-tk1:~# cat /sys/kernel/debug/clock/d_audio/rate
24576000

Currently using 8ch96k32-bit, hence the fast audio clock, i should also be able to try using 16-bit sample size soon, which should take the audio clock down to 12288000 Hz.

Cheers,
Ed

As far as I can see, the APBDMA parent clock is clk_m, whose rate appears to be tied to crystal frequency (in the kernel, autodetected from the CLK_RST_CONTROLLER_OSC_FREQ_DET_0 register, vs the RTC 32.768kHz clock source).

From that i’m guessing i can’t increase clk_m… but can the APBDMA be re-parented to a faster clock, and is that worth doing?

I guess the DMA clock must be tied to the AHB and the APB clocks, since it bridges these two buses.

Thanks,
Ed

Hi Ed,

I will check on this and let you know.

Jon

Hi Ed,

So here is what I found out. The parent clock for the apbdma is the ‘pclk’ and not ‘clk_m’ as shown by debugfs. This is a bug in the kernel. It was recently fixed in the mainline kernel by the following fix …

https://lkml.org/lkml/2017/10/3/979

The fix does not apply to the L4T v3.10 kernel but because the apbdma clock parent is fixed it is not critical that we fix the debugfs entry for it. If you look at the ‘pclk’ rate it is probably something like 102MHz and so this should be fine.

If you are getting overruns, then this suggests that the CPU is not keeping up with the DMA. Is ALSA arecord reporting the overrun?

Can you tell me how many CPUs are online?

$ cat /sys/devices/system/cpu/online

Regards,
Jon