Sorry for late reply. Please follow the steps to start EP mode DMA.
Only C5 controller can be used as it can own open slot
Two Xavier devkit boards are required with one configured for C5’s RP operation and other configured for C5’s EP operation
Need finger-to-finger card to connect two devkits using their PCIe x8/x16 open slot
Please flash one devkit with default ODM data to operate C5 in Root port mode and other with bit-12 set to ‘1’ to operate C5 in Endpoint mode
patch 1
Subject: [PATCH] DNI: arm64: configs: Enable PCIe DMA test framework
Enables test framework for embedded DMA engine of PCIe IP to perform
read and write operations
---
diff --git a/arch/arm64/configs/tegra_defconfig b/arch/arm64/configs/tegra_defconfig
index 53f712a..ec693c6 100644
--- a/arch/arm64/configs/tegra_defconfig
+++ b/arch/arm64/configs/tegra_defconfig
@@ -51,6 +51,7 @@
CONFIG_PCI_STUB=m
CONFIG_PCI_IOV=y
CONFIG_PCIE_TEGRA=y
+CONFIG_PCIE_TEGRA_DW_DMA_TEST=y
CONFIG_PCIE_TEGRA_EP=y
CONFIG_PCI_TEGRA=y
CONFIG_PCI_ENDPOINT=y
patch 2
Subject: [PATCH] PCI: tegra: Fix DMA test framework build issue
Fix DMA test framework buid issue by defining 'struct dw_pcie *'
pointer correctly
---
diff --git a/drivers/pci/dwc/pcie-tegra.c b/drivers/pci/dwc/pcie-tegra.c
index 6baa78f..ed9d1ef 100644
--- a/drivers/pci/dwc/pcie-tegra.c
+++ b/drivers/pci/dwc/pcie-tegra.c
@@ -1038,6 +1038,7 @@
#ifdef CONFIG_PCIE_TEGRA_DW_DMA_TEST
static int dma_write(struct tegra_pcie_dw *pcie, struct dma_tx *tx)
{
+ struct dw_pcie *pci = &pcie->pci;
struct device *dev = pcie->dev;
u32 val = 0, bit = 0;
int ret = 0;
@@ -1164,6 +1165,7 @@
static int dma_read(struct tegra_pcie_dw *pcie, struct dma_tx *tx)
{
+ struct dw_pcie *pci = &pcie->pci;
struct device *dev = pcie->dev;
u32 val = 0, bit = 0;
int ret = 0;
patch 3
Subject: [PATCH] DNI: PCI: tegra: Setup DMA test framework for perf measurement
---
diff --git a/drivers/misc/tegra-pcie-ep-mem.c b/drivers/misc/tegra-pcie-ep-mem.c
index 1f04f9f..821ea1e 100644
--- a/drivers/misc/tegra-pcie-ep-mem.c
+++ b/drivers/misc/tegra-pcie-ep-mem.c
@@ -90,7 +90,7 @@
#define DMA_LLP_LOW_OFF_RDCH (0x1C + 0x100)
#define DMA_LLP_HIGH_OFF_RDCH (0x20 + 0x100)
-static unsigned long alloc_size = 0xA00000;
+static unsigned long alloc_size = 0x20000000;
module_param(alloc_size, ulong, 0660);
MODULE_PARM_DESC(alloc_size, "Allocation Size");
@@ -457,10 +457,12 @@
}
/* compare copied data */
+#if 0
if (!memcmp(__io_virt(bar_mem), phys_to_virt(ep->dst), ep->size))
dev_info(&ep->pdev->dev, "DMA-Write test PASSED\n");
else
dev_info(&ep->pdev->dev, "DMA-Write test FAILED\n");
+#endif
err_out:
iounmap(bar_mem);
err_remap:
@@ -596,11 +598,12 @@
}
/* compare copied data */
+#if 0
if (!memcmp(__io_virt(bar_mem), phys_to_virt(ep->src), ep->size))
dev_info(&ep->pdev->dev, "DMA-Read test PASSED\n");
else
dev_info(&ep->pdev->dev, "DMA-Read test FAILED\n");
-
+#endif
err_out:
iounmap(bar_mem);
err_remap:
diff --git a/drivers/pci/dwc/pcie-tegra.c b/drivers/pci/dwc/pcie-tegra.c
index ed9d1ef..2a3672b 100644
--- a/drivers/pci/dwc/pcie-tegra.c
+++ b/drivers/pci/dwc/pcie-tegra.c
@@ -1317,11 +1317,12 @@
}
/* compare copied data */
+#if 0
if (!memcmp(pcie->cpu_virt_addr, dst_cpu_virt, pcie->size))
dev_info(pcie->dev, "DMA-Write test PASSED\n");
else
dev_info(pcie->dev, "DMA-Write test FAILED\n");
-
+#endif
err_out:
iounmap(dst_cpu_virt);
return ret;
@@ -1432,11 +1433,12 @@
}
/* compare copied data */
+#if 0
if (!memcmp(dst_cpu_virt, pcie->cpu_virt_addr, pcie->size))
dev_info(pcie->dev, "DMA-Read test PASSED\n");
else
dev_info(pcie->dev, "DMA-Read test FAILED\n");
-
+#endif
err_out:
iounmap(dst_cpu_virt);
return ret;
diff --git a/drivers/pci/endpoint/functions/pci-epf-nv-test.c b/drivers/pci/endpoint/functions/pci-epf-nv-test.c
index 8b2a1dc..f4132a7 100644
--- a/drivers/pci/endpoint/functions/pci-epf-nv-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-nv-test.c
@@ -16,7 +16,7 @@
#include <linux/pci-epc.h>
#include <linux/pci-epf.h>
-#define BAR0_SIZE SZ_64K
+#define BAR0_SIZE SZ_512M
struct pci_epf_nv_test {
struct pci_epf_header header;
@@ -30,14 +30,11 @@
struct pci_epf_nv_test *epfnv = epf_get_drvdata(epf);
struct pci_epc *epc = epf->epc;
struct device *cdev = epc->dev.parent;
- struct iommu_domain *domain = iommu_get_domain_for_dev(cdev);
pci_epc_stop(epc);
pci_epc_clear_bar(epc, BAR_0);
- vunmap(epfnv->bar0_ram_map);
- iommu_unmap(domain, epfnv->bar0_iova, PAGE_SIZE);
- iommu_dma_free_iova(cdev, epfnv->bar0_iova, BAR0_SIZE);
- __free_pages(epfnv->bar0_ram_page, 1);
+ dma_free_coherent(cdev, BAR0_SIZE, epfnv->bar0_ram_map,
+ epfnv->bar0_iova);
}
static int pci_epf_nv_test_bind(struct pci_epf *epf)
@@ -47,7 +44,6 @@
struct pci_epf_header *header = epf->header;
struct device *fdev = &epf->dev;
struct device *cdev = epc->dev.parent;
- struct iommu_domain *domain = iommu_get_domain_for_dev(cdev);
int ret;
ret = pci_epc_write_header(epc, header);
@@ -56,60 +52,29 @@
return ret;
}
- epfnv->bar0_ram_page = alloc_pages(GFP_KERNEL, 1);
- if (!epfnv->bar0_ram_page) {
- dev_err(fdev, "alloc_pages() failed\n");
- ret = -ENOMEM;
- goto fail;
- }
- dev_info(fdev, "BAR0 RAM phys: 0x%llx\n",
- page_to_phys(epfnv->bar0_ram_page));
-
- epfnv->bar0_iova = iommu_dma_alloc_iova(cdev, BAR0_SIZE,
- cdev->coherent_dma_mask);
- if (!epfnv->bar0_iova) {
- dev_err(fdev, "iommu_dma_alloc_iova() failed\n");
- ret = -ENOMEM;
- goto fail_free_pages;
- }
-
- dev_info(fdev, "BAR0 RAM IOVA: 0x%08llx\n", epfnv->bar0_iova);
-
- ret = iommu_map(domain, epfnv->bar0_iova,
- page_to_phys(epfnv->bar0_ram_page),
- PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
- if (ret) {
- dev_err(fdev, "iommu_map(RAM) failed: %d\n", ret);
- goto fail_free_iova;
- }
- epfnv->bar0_ram_map = vmap(&epfnv->bar0_ram_page, 1, VM_MAP,
- PAGE_KERNEL);
+ epfnv->bar0_ram_map = dma_alloc_coherent(cdev, BAR0_SIZE,
+ &epfnv->bar0_iova, GFP_KERNEL);
if (!epfnv->bar0_ram_map) {
- dev_err(fdev, "vmap() failed\n");
+ dev_err(fdev, "dma_alloc_coherent() failed\n");
ret = -ENOMEM;
- goto fail_unmap_ram_iova;
+ return ret;
}
- dev_info(fdev, "BAR0 RAM virt: 0x%p\n", epfnv->bar0_ram_map);
+ dev_info(fdev, "BAR0 RAM IOVA: 0x%08llx\n", epfnv->bar0_iova);
ret = pci_epc_set_bar(epc, BAR_0, epfnv->bar0_iova, BAR0_SIZE,
PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_32);
if (ret) {
dev_err(fdev, "pci_epc_set_bar() failed: %d\n", ret);
- goto fail_unmap_ram_virt;
+ goto fail_set_bar;
+ return ret;
}
return 0;
-fail_unmap_ram_virt:
- vunmap(epfnv->bar0_ram_map);
-fail_unmap_ram_iova:
- iommu_unmap(domain, epfnv->bar0_iova, PAGE_SIZE);
-fail_free_iova:
- iommu_dma_free_iova(cdev, epfnv->bar0_iova, BAR0_SIZE);
-fail_free_pages:
- __free_pages(epfnv->bar0_ram_page, 1);
-fail:
+fail_set_bar:
+ dma_free_coherent(cdev, BAR0_SIZE, epfnv->bar0_ram_map,
+ epfnv->bar0_iova);
return ret;
}
patch 4
Subject: [PATCH] DNI: dts: t19x: Enable DMA polling
Enable polling mechanism for DMA read/write operations instead of
interrupt mechanism for accurate perf measurements
---
diff --git a/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi b/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
index d052446..00ca791 100644
--- a/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
+++ b/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
@@ -880,6 +880,7 @@
204000000 408000000 666000000 1066000000
408000000 666000000 1066000000 2133000000 >;
+ nvidia,dma-poll;
nvidia,max-speed = <4>;
nvidia,disable-aspm-states = <0xf>;
nvidia,controller-id = <&bpmp 0x5>;
And use below steps to start a test.
Go to the console of the system where PCIe IP is operating as endpoint and execute the following commands
cd /sys/kernel/config/pci_ep/
mkdir functions/pci_epf_nv_test/func1
echo 0x1AD5 > functions/pci_epf_nv_test/func1/deviceid
echo 16 > functions/pci_epf_nv_test/func1/msi_interrupts
ln -s functions/pci_epf_nv_test/func1 controllers/141a0000.pcie_ep/
echo 1 > controllers/141a0000.pcie_ep/start
NOTE:- Boot the host system only after executing all the above commands.
As soon as system boots, execute following commands to prepare the system for perf checkout
Execute following command to enable perf mode w.r.t ASPM
echo "performance" > /sys/module/pcie_aspm/parameters/policy
start the test
RP Mode DMA
In the below procedure, x being the number of the root port controller whose DMA is being used for perf checkout
Write
Go to the debugfs directory of the root port controller
cd /sys/kernel/debug/pcie-x/
Set channel number (set it to one of 0,1,2,3)
echo 1 > channel
Set size to 1GB
echo 0x20000000 > size
Set source address for DMA.
For this, grep for the string “---> Allocated memory for DMA” in dmesg log and use whatever address comes up in the grep output
dmesg | grep " \-\-\-> Allocated memory for DMA"
example output would be something like
[ 7.102149] tegra-pcie-dw 141a0000.pcie: ---> Allocated memory for DMA @ 0xA0000000
So, use 0xA0000000 as the source address
echo 0xA0000000 > src
Note: - don’t forget to replace 0xA0000000 with your grep output value. In case it is not found in grep output, save full kernel boot log and search in it
Set destination address for DMA
For this, execute the following command
lspci -vv | grep -i "region 0"
an example output would be something like
Region 0: Memory at 1f40000000 (32-bit, non-prefetchable)
So, use 1f40000000 as destination address
echo 0x1f40000000 > dst
Note: - don’t forget to replace 0x1f40000000 with your grep output value. In case it is not found in grep output, save full kernel boot log and search in it
Execute write test
cat write
It prints the output in the following format
tegra-pcie-dw 14100000.pcie_c1_rp: DMA write. Size: 536870912 bytes, Time diff: 316519776 ns
Perf calculation:
Perf = (Size * 8 * 1000000000)/(Time diff * 1024 * 1024 * 1024) Gbps
Read test can be performed by interchanging 'src' and 'dst' and executing 'echo read' command.
EP Mode DMA
Write
Go to the debugfs directory of the end point client driver
cd /sys/kernel/debug/tegra_pcie_ep/
Set channel number (set it to one of 0,1,2,3)
echo 1 > channel
Set size to 512 MB
echo 0x20000000 > size
Set source address for EP’s DMA.
For this, grep for the string "BAR0 RAM IOVA” in dmesg log of endpoint system console and use whatever address comes up in the grep output
dmesg | grep "BAR0 RAM IOVA"
an example output would be something like
pci_epf_nv_test pci_epf_nv_test.0: BAR0 RAM IOVA: 0xe0000000
So, use 0xe0000000 as source address
echo 0xe0000000 > src
Note: - don’t forget to replace 0xe0000000 with your grep output value. In case it is not found in grep output, save full kernel boot log and search in it
Set destination address for DMA
For this, grep for the string “Allocated memory for DMA operation” in dmesg log of host system console (i.e. current system) and use whatever address comes up in the grep output
dmesg | grep " Allocated memory for DMA operation"
an example output would be something like
tegra_ep_mem 0005:01:00.0: Allocated memory for DMA operation @ 0xC0000000, size=0x20000000
So, use 0xC0000000 as source address
echo 0xC0000000 > dst
Note: - don’t forget to replace 0xC0000000 with your grep output value. In case it is not found in grep output, save full kernel boot log and search in it
Execute write test
cat write
It prints the output in the following format
tegra_ep_mem 0000:01:00.0: DMA write: Size: 536870912 bytes, Time diff: 296565536 ns
Perf calculation:
Perf = (Size * 8 * 1000000000)/(Time diff * 1024 * 1024 * 1024) Gbps
Read test can be performed by interchanging 'src' and 'dst' and executing 'echo read' command.