* [PATCH v1 0/2] Add AMD MDB Endpoint and non-LL mode Support
@ 2025-09-11 11:44 Devendra K Verma
2025-09-11 11:44 ` [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support Devendra K Verma
2025-09-11 11:44 ` [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode Devendra K Verma
0 siblings, 2 replies; 9+ messages in thread
From: Devendra K Verma @ 2025-09-11 11:44 UTC (permalink / raw)
To: bhelgaas, mani, vkoul; +Cc: dmaengine, linux-pci, linux-kernel, michal.simek
This series of patch support the following:
- AMD MDB Endpoint Support, as part of this patch following are
added:
o AMD supported device ID and vendor ID (Xilinx)
o AMD MDB specific driver data
o AMD specific VSEC capabilities to retrieve the base of
phys address of MDB side DDR
o Logic to assign the offsets to LL and data blocks if
more number of channels are enabled than configured
in the given pci_data struct.
- Addition of non-LL mode
o The IP supported non-LL mode functions
o Flexibility to choose non-LL mode via dma_slave_config
param peripheral_config, by the client
o Allow IP utilization if LL mode is not available
Devendra K Verma (2):
dmaengine: dw-edma: Add AMD MDB Endpoint Support
dmaengine: dw-edma: Add non-LL mode
drivers/dma/dw-edma/dw-edma-core.c | 38 ++++++--
drivers/dma/dw-edma/dw-edma-core.h | 1 +
drivers/dma/dw-edma/dw-edma-pcie.c | 162 ++++++++++++++++++++++++++++++++--
drivers/dma/dw-edma/dw-hdma-v0-core.c | 62 ++++++++++++-
include/linux/dma/edma.h | 1 +
5 files changed, 251 insertions(+), 13 deletions(-)
--
1.8.3.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support
2025-09-11 11:44 [PATCH v1 0/2] Add AMD MDB Endpoint and non-LL mode Support Devendra K Verma
@ 2025-09-11 11:44 ` Devendra K Verma
2025-09-11 21:50 ` Bjorn Helgaas
2025-09-11 11:44 ` [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode Devendra K Verma
1 sibling, 1 reply; 9+ messages in thread
From: Devendra K Verma @ 2025-09-11 11:44 UTC (permalink / raw)
To: bhelgaas, mani, vkoul; +Cc: dmaengine, linux-pci, linux-kernel, michal.simek
AMD MDB PCIe endpoint support. For AMD specific support
added the following
- AMD supported PCIe Device IDs and Vendor ID (Xilinx).
- AMD MDB specific driver data
- AMD MDB specific VSEC capability to retrieve the device DDR
base address.
Signed-off-by: Devendra K Verma <devendra.verma@amd.com>
---
Changes in v1:
Removed the pci device id from pci_ids.h file.
Added the vendor id macro as per the suggested method.
Changed the type of the newly added devmem_phys_off variable.
Added to logic to assign offsets for LL and data region blocks
in case more number of channels are enabled than given in
amd_mdb_data struct.
---
drivers/dma/dw-edma/dw-edma-pcie.c | 132 ++++++++++++++++++++++++++++++++++++-
1 file changed, 131 insertions(+), 1 deletion(-)
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 3371e0a7..48ecfce 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -23,6 +23,11 @@
#define DW_PCIE_VSEC_DMA_WR_CH GENMASK(9, 0)
#define DW_PCIE_VSEC_DMA_RD_CH GENMASK(25, 16)
+/* AMD MDB specific defines */
+#define DW_PCIE_XILINX_MDB_VSEC_ID 0x20
+#define PCI_DEVICE_ID_AMD_MDB_B054 0xb054
+#define DW_PCIE_AMD_MDB_INVALID_ADDR (~0ULL)
+
#define DW_BLOCK(a, b, c) \
{ \
.bar = a, \
@@ -50,6 +55,7 @@ struct dw_edma_pcie_data {
u8 irqs;
u16 wr_ch_cnt;
u16 rd_ch_cnt;
+ pci_bus_addr_t devmem_phys_off;
};
static const struct dw_edma_pcie_data snps_edda_data = {
@@ -90,6 +96,89 @@ struct dw_edma_pcie_data {
.rd_ch_cnt = 2,
};
+static const struct dw_edma_pcie_data amd_mdb_data = {
+ /* MDB registers location */
+ .rg.bar = BAR_0,
+ .rg.off = 0x00001000, /* 4 Kbytes */
+ .rg.sz = 0x00002000, /* 8 Kbytes */
+ /* MDB memory linked list location */
+ .ll_wr = {
+ /* Channel 0 - BAR 2, offset 0 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00000000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 2 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00200000, 0x00000800)
+ },
+ .ll_rd = {
+ /* Channel 0 - BAR 2, offset 4 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00400000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 6 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00600000, 0x00000800)
+ },
+ /* MDB memory data location */
+ .dt_wr = {
+ /* Channel 0 - BAR 2, offset 8 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00800000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 9 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00900000, 0x00000800)
+ },
+ .dt_rd = {
+ /* Channel 0 - BAR 2, offset 10 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00a00000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 11 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00b00000, 0x00000800)
+ },
+ /* Other */
+ .mf = EDMA_MF_HDMA_NATIVE,
+ .irqs = 1,
+ .wr_ch_cnt = 2,
+ .rd_ch_cnt = 2,
+};
+
+static void dw_edma_assign_chan_data(struct dw_edma_pcie_data *pdata,
+ enum pci_barno bar)
+{
+ u16 i;
+ off_t off = 0, offsz = 0x200000;
+ size_t size = 0x800;
+ u16 wr_ch = pdata->wr_ch_cnt;
+ u16 rd_ch = pdata->rd_ch_cnt;
+
+ if (wr_ch <= 2 || rd_ch <= 2)
+ return;
+
+ /* Write channel LL region */
+ for (i = 0; i < wr_ch; i++) {
+ pdata->ll_wr[i].bar = bar;
+ pdata->ll_wr[i].off = off;
+ pdata->ll_wr[i].sz = size;
+ off += offsz + size;
+ }
+
+ /* Read channel LL region */
+ for (i = 0; i < rd_ch; i++) {
+ pdata->ll_rd[i].bar = bar;
+ pdata->ll_rd[i].off = off;
+ pdata->ll_rd[i].sz = size;
+ off += offsz + size;
+ }
+
+ /* Write channel data region */
+ for (i = 0; i < wr_ch; i++) {
+ pdata->dt_wr[i].bar = bar;
+ pdata->dt_wr[i].off = off;
+ pdata->dt_wr[i].sz = size;
+ off += offsz + size;
+ }
+
+ /* Read channel data region */
+ for (i = 0; i < rd_ch; i++) {
+ pdata->dt_rd[i].bar = bar;
+ pdata->dt_rd[i].off = off;
+ pdata->dt_rd[i].sz = size;
+ off += offsz + size;
+ }
+}
+
static int dw_edma_pcie_irq_vector(struct device *dev, unsigned int nr)
{
return pci_irq_vector(to_pci_dev(dev), nr);
@@ -121,7 +210,11 @@ static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
u16 vsec;
u64 off;
- vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
+ /*
+ * Synopsys and AMD (Xilinx) use the same VSEC ID for the purpose
+ * of map, channel counts, etc.
+ */
+ vsec = pci_find_vsec_capability(pdev, pdev->vendor,
DW_PCIE_VSEC_DMA_ID);
if (!vsec)
return;
@@ -155,6 +248,24 @@ static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
off <<= 32;
off |= val;
pdata->rg.off = off;
+
+ /* AMD specific VSEC capability */
+ vsec = pci_find_vsec_capability(pdev, pdev->vendor,
+ DW_PCIE_XILINX_MDB_VSEC_ID);
+ if (!vsec)
+ return;
+
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
+ if (PCI_VNDR_HEADER_ID(val) != 0x20 ||
+ PCI_VNDR_HEADER_REV(val) != 0x1)
+ return;
+
+ pci_read_config_dword(pdev, vsec + 0xc, &val);
+ off = val;
+ pci_read_config_dword(pdev, vsec + 0x8, &val);
+ off <<= 32;
+ off |= val;
+ pdata->devmem_phys_off = off;
}
static int dw_edma_pcie_probe(struct pci_dev *pdev,
@@ -179,6 +290,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
}
memcpy(vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
+ vsec_data->devmem_phys_off = DW_PCIE_AMD_MDB_INVALID_ADDR;
/*
* Tries to find if exists a PCIe Vendor-Specific Extended Capability
@@ -186,6 +298,22 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
*/
dw_edma_pcie_get_vsec_dma_data(pdev, vsec_data);
+ if (pdev->vendor == PCI_VENDOR_ID_XILINX) {
+ /*
+ * There is no valid address found for the LL memory
+ * space on the device side.
+ */
+ if (vsec_data->devmem_phys_off == DW_PCIE_AMD_MDB_INVALID_ADDR)
+ return -EINVAL;
+
+ /*
+ * Configure the channel LL and data blocks if number of
+ * channels enabled in VSEC capability are more than the
+ * channels configured in amd_mdb_data.
+ */
+ dw_edma_assign_chan_data(vsec_data, BAR_2);
+ }
+
/* Mapping PCI BAR regions */
mask = BIT(vsec_data->rg.bar);
for (i = 0; i < vsec_data->wr_ch_cnt; i++) {
@@ -367,6 +495,8 @@ static void dw_edma_pcie_remove(struct pci_dev *pdev)
static const struct pci_device_id dw_edma_pcie_id_table[] = {
{ PCI_DEVICE_DATA(SYNOPSYS, EDDA, &snps_edda_data) },
+ { PCI_VDEVICE(XILINX, PCI_DEVICE_ID_AMD_MDB_B054),
+ (kernel_ulong_t)&amd_mdb_data },
{ }
};
MODULE_DEVICE_TABLE(pci, dw_edma_pcie_id_table);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
2025-09-11 11:44 [PATCH v1 0/2] Add AMD MDB Endpoint and non-LL mode Support Devendra K Verma
2025-09-11 11:44 ` [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support Devendra K Verma
@ 2025-09-11 11:44 ` Devendra K Verma
2025-09-11 21:53 ` Bjorn Helgaas
1 sibling, 1 reply; 9+ messages in thread
From: Devendra K Verma @ 2025-09-11 11:44 UTC (permalink / raw)
To: bhelgaas, mani, vkoul; +Cc: dmaengine, linux-pci, linux-kernel, michal.simek
AMD MDB IP supports Linked List (LL) mode as well as non-LL mode.
The current code does not have the mechanisms to enable the
DMA transactions using the non-LL mode. The following two cases
are added with this patch:
- When a valid physical base address is not configured via the
Xilinx VSEC capability then the IP can still be used in non-LL
mode. The default mode for all the DMA transactions and for all
the DMA channels then is non-LL mode.
- When a valid physical base address is configured but the client
wants to use the non-LL mode for DMA transactions then also the
flexibility is provided via the peripheral_config struct member of
dma_slave_config. In this case the channels can be individually
configured in non-LL mode. This use case is desirable for single
DMA transfer of a chunk, this saves the effort of preparing the
Link List.
Signed-off-by: Devendra K Verma <devendra.verma@amd.com>
---
Changes in v1
Changed the function return type for dw_edma_get_phys_addr().
Corrected the typo raised in review.
---
drivers/dma/dw-edma/dw-edma-core.c | 38 ++++++++++++++++++---
drivers/dma/dw-edma/dw-edma-core.h | 1 +
drivers/dma/dw-edma/dw-edma-pcie.c | 36 +++++++++++++++-----
drivers/dma/dw-edma/dw-hdma-v0-core.c | 62 ++++++++++++++++++++++++++++++++++-
include/linux/dma/edma.h | 1 +
5 files changed, 123 insertions(+), 15 deletions(-)
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index b43255f..3283ac5 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -223,8 +223,28 @@ static int dw_edma_device_config(struct dma_chan *dchan,
struct dma_slave_config *config)
{
struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ int nollp = 0;
+
+ if (WARN_ON(config->peripheral_config &&
+ config->peripheral_size != sizeof(int)))
+ return -EINVAL;
memcpy(&chan->config, config, sizeof(*config));
+
+ /*
+ * When there is no valid LLP base address available
+ * then the default DMA ops will use the non-LL mode.
+ * Cases where LL mode is enabled and client wants
+ * to use the non-LL mode then also client can do
+ * so via providing the peripheral_config param.
+ */
+ if (config->peripheral_config)
+ nollp = *(int *)config->peripheral_config;
+
+ chan->nollp = false;
+ if (chan->dw->chip->nollp || (!chan->dw->chip->nollp && nollp))
+ chan->nollp = true;
+
chan->configured = true;
return 0;
@@ -353,7 +373,7 @@ static void dw_edma_device_issue_pending(struct dma_chan *dchan)
struct dw_edma_chan *chan = dchan2dw_edma_chan(xfer->dchan);
enum dma_transfer_direction dir = xfer->direction;
struct scatterlist *sg = NULL;
- struct dw_edma_chunk *chunk;
+ struct dw_edma_chunk *chunk = NULL;
struct dw_edma_burst *burst;
struct dw_edma_desc *desc;
u64 src_addr, dst_addr;
@@ -419,9 +439,11 @@ static void dw_edma_device_issue_pending(struct dma_chan *dchan)
if (unlikely(!desc))
goto err_alloc;
- chunk = dw_edma_alloc_chunk(desc);
- if (unlikely(!chunk))
- goto err_alloc;
+ if (!chan->nollp) {
+ chunk = dw_edma_alloc_chunk(desc);
+ if (unlikely(!chunk))
+ goto err_alloc;
+ }
if (xfer->type == EDMA_XFER_INTERLEAVED) {
src_addr = xfer->xfer.il->src_start;
@@ -450,7 +472,13 @@ static void dw_edma_device_issue_pending(struct dma_chan *dchan)
if (xfer->type == EDMA_XFER_SCATTER_GATHER && !sg)
break;
- if (chunk->bursts_alloc == chan->ll_max) {
+ /*
+ * For non-LL mode, only a single burst can be handled
+ * in a single chunk unlike LL mode where multiple bursts
+ * can be configured in a single chunk.
+ */
+ if ((chunk && chunk->bursts_alloc == chan->ll_max) ||
+ chan->nollp) {
chunk = dw_edma_alloc_chunk(desc);
if (unlikely(!chunk))
goto err_alloc;
diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h
index 71894b9..2a4ad45 100644
--- a/drivers/dma/dw-edma/dw-edma-core.h
+++ b/drivers/dma/dw-edma/dw-edma-core.h
@@ -86,6 +86,7 @@ struct dw_edma_chan {
u8 configured;
struct dma_slave_config config;
+ bool nollp;
};
struct dw_edma_irq {
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 48ecfce..daf1fa7 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -268,6 +268,15 @@ static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
pdata->devmem_phys_off = off;
}
+static pci_bus_addr_t dw_edma_get_phys_addr(struct pci_dev *pdev,
+ struct dw_edma_pcie_data *pdata,
+ enum pci_barno bar)
+{
+ if (pdev->vendor == PCI_VENDOR_ID_XILINX)
+ return pdata->devmem_phys_off;
+ return pci_bus_address(pdev, bar);
+}
+
static int dw_edma_pcie_probe(struct pci_dev *pdev,
const struct pci_device_id *pid)
{
@@ -277,6 +286,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
struct dw_edma_chip *chip;
int err, nr_irqs;
int i, mask;
+ bool nollp = false;
vsec_data = kmalloc(sizeof(*vsec_data), GFP_KERNEL);
if (!vsec_data)
@@ -301,17 +311,20 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
if (pdev->vendor == PCI_VENDOR_ID_XILINX) {
/*
* There is no valid address found for the LL memory
- * space on the device side.
+ * space on the device side. In the absence of LL base
+ * address use the non-LL mode or simple mode supported by
+ * the HDMA IP.
*/
if (vsec_data->devmem_phys_off == DW_PCIE_AMD_MDB_INVALID_ADDR)
- return -EINVAL;
+ nollp = true;
/*
* Configure the channel LL and data blocks if number of
* channels enabled in VSEC capability are more than the
* channels configured in amd_mdb_data.
*/
- dw_edma_assign_chan_data(vsec_data, BAR_2);
+ if (!nollp)
+ dw_edma_assign_chan_data(vsec_data, BAR_2);
}
/* Mapping PCI BAR regions */
@@ -359,6 +372,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
chip->mf = vsec_data->mf;
chip->nr_irqs = nr_irqs;
chip->ops = &dw_edma_pcie_plat_ops;
+ chip->nollp = nollp;
chip->ll_wr_cnt = vsec_data->wr_ch_cnt;
chip->ll_rd_cnt = vsec_data->rd_ch_cnt;
@@ -367,7 +381,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
if (!chip->reg_base)
return -ENOMEM;
- for (i = 0; i < chip->ll_wr_cnt; i++) {
+ for (i = 0; i < chip->ll_wr_cnt && !nollp; i++) {
struct dw_edma_region *ll_region = &chip->ll_region_wr[i];
struct dw_edma_region *dt_region = &chip->dt_region_wr[i];
struct dw_edma_block *ll_block = &vsec_data->ll_wr[i];
@@ -378,7 +392,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return -ENOMEM;
ll_region->vaddr.io += ll_block->off;
- ll_region->paddr = pci_bus_address(pdev, ll_block->bar);
+ ll_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+ ll_block->bar);
ll_region->paddr += ll_block->off;
ll_region->sz = ll_block->sz;
@@ -387,12 +402,13 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return -ENOMEM;
dt_region->vaddr.io += dt_block->off;
- dt_region->paddr = pci_bus_address(pdev, dt_block->bar);
+ dt_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+ dt_block->bar);
dt_region->paddr += dt_block->off;
dt_region->sz = dt_block->sz;
}
- for (i = 0; i < chip->ll_rd_cnt; i++) {
+ for (i = 0; i < chip->ll_rd_cnt && !nollp; i++) {
struct dw_edma_region *ll_region = &chip->ll_region_rd[i];
struct dw_edma_region *dt_region = &chip->dt_region_rd[i];
struct dw_edma_block *ll_block = &vsec_data->ll_rd[i];
@@ -403,7 +419,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return -ENOMEM;
ll_region->vaddr.io += ll_block->off;
- ll_region->paddr = pci_bus_address(pdev, ll_block->bar);
+ ll_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+ ll_block->bar);
ll_region->paddr += ll_block->off;
ll_region->sz = ll_block->sz;
@@ -412,7 +429,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return -ENOMEM;
dt_region->vaddr.io += dt_block->off;
- dt_region->paddr = pci_bus_address(pdev, dt_block->bar);
+ dt_region->paddr = dw_edma_get_phys_addr(pdev, vsec_data,
+ dt_block->bar);
dt_region->paddr += dt_block->off;
dt_region->sz = dt_block->sz;
}
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c
index e3f8db4..befb9e0 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -225,7 +225,7 @@ static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk)
readl(chunk->ll_region.vaddr.io);
}
-static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+static void dw_hdma_v0_core_ll_start(struct dw_edma_chunk *chunk, bool first)
{
struct dw_edma_chan *chan = chunk->chan;
struct dw_edma *dw = chan->dw;
@@ -263,6 +263,66 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START);
}
+static void dw_hdma_v0_core_non_ll_start(struct dw_edma_chunk *chunk)
+{
+ struct dw_edma_chan *chan = chunk->chan;
+ struct dw_edma *dw = chan->dw;
+ struct dw_edma_burst *child;
+ u32 val;
+
+ list_for_each_entry(child, &chunk->burst->list, list) {
+ SET_CH_32(dw, chan->dir, chan->id, ch_en, BIT(0));
+
+ /* Source address */
+ SET_CH_32(dw, chan->dir, chan->id, sar.lsb,
+ lower_32_bits(child->sar));
+ SET_CH_32(dw, chan->dir, chan->id, sar.msb,
+ upper_32_bits(child->sar));
+
+ /* Destination address */
+ SET_CH_32(dw, chan->dir, chan->id, dar.lsb,
+ lower_32_bits(child->dar));
+ SET_CH_32(dw, chan->dir, chan->id, dar.msb,
+ upper_32_bits(child->dar));
+
+ /* Transfer size */
+ SET_CH_32(dw, chan->dir, chan->id, transfer_size, child->sz);
+
+ /* Interrupt setup */
+ val = GET_CH_32(dw, chan->dir, chan->id, int_setup) |
+ HDMA_V0_STOP_INT_MASK |
+ HDMA_V0_ABORT_INT_MASK |
+ HDMA_V0_LOCAL_STOP_INT_EN |
+ HDMA_V0_LOCAL_ABORT_INT_EN;
+
+ if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL)) {
+ val |= HDMA_V0_REMOTE_STOP_INT_EN |
+ HDMA_V0_REMOTE_ABORT_INT_EN;
+ }
+
+ SET_CH_32(dw, chan->dir, chan->id, int_setup, val);
+
+ /* Channel control setup */
+ val = GET_CH_32(dw, chan->dir, chan->id, control1);
+ val &= ~HDMA_V0_LINKLIST_EN;
+ SET_CH_32(dw, chan->dir, chan->id, control1, val);
+
+ /* Ring the doorbell */
+ SET_CH_32(dw, chan->dir, chan->id, doorbell,
+ HDMA_V0_DOORBELL_START);
+ }
+}
+
+static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+{
+ struct dw_edma_chan *chan = chunk->chan;
+
+ if (!chan->nollp)
+ dw_hdma_v0_core_ll_start(chunk, first);
+ else
+ dw_hdma_v0_core_non_ll_start(chunk);
+}
+
static void dw_hdma_v0_core_ch_config(struct dw_edma_chan *chan)
{
struct dw_edma *dw = chan->dw;
diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h
index 3080747..e14e16f 100644
--- a/include/linux/dma/edma.h
+++ b/include/linux/dma/edma.h
@@ -99,6 +99,7 @@ struct dw_edma_chip {
enum dw_edma_map_format mf;
struct dw_edma *dw;
+ bool nollp;
};
/* Export to the platform drivers */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support
2025-09-11 11:44 ` [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support Devendra K Verma
@ 2025-09-11 21:50 ` Bjorn Helgaas
2025-09-16 10:34 ` Verma, Devendra
0 siblings, 1 reply; 9+ messages in thread
From: Bjorn Helgaas @ 2025-09-11 21:50 UTC (permalink / raw)
To: Devendra K Verma
Cc: bhelgaas, mani, vkoul, dmaengine, linux-pci, linux-kernel,
michal.simek
On Thu, Sep 11, 2025 at 05:14:50PM +0530, Devendra K Verma wrote:
> AMD MDB PCIe endpoint support. For AMD specific support
> added the following
> - AMD supported PCIe Device IDs and Vendor ID (Xilinx).
> - AMD MDB specific driver data
> - AMD MDB specific VSEC capability to retrieve the device DDR
> base address.
>
> Signed-off-by: Devendra K Verma <devendra.verma@amd.com>
> ---
> Changes in v1:
> Removed the pci device id from pci_ids.h file.
> Added the vendor id macro as per the suggested method.
> Changed the type of the newly added devmem_phys_off variable.
> Added to logic to assign offsets for LL and data region blocks
> in case more number of channels are enabled than given in
> amd_mdb_data struct.
> ---
> drivers/dma/dw-edma/dw-edma-pcie.c | 132 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 131 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
> index 3371e0a7..48ecfce 100644
> --- a/drivers/dma/dw-edma/dw-edma-pcie.c
> +++ b/drivers/dma/dw-edma/dw-edma-pcie.c
> @@ -23,6 +23,11 @@
> #define DW_PCIE_VSEC_DMA_WR_CH GENMASK(9, 0)
> #define DW_PCIE_VSEC_DMA_RD_CH GENMASK(25, 16)
>
> +/* AMD MDB specific defines */
> +#define DW_PCIE_XILINX_MDB_VSEC_ID 0x20
> +#define PCI_DEVICE_ID_AMD_MDB_B054 0xb054
> +#define DW_PCIE_AMD_MDB_INVALID_ADDR (~0ULL)
> +
> #define DW_BLOCK(a, b, c) \
> { \
> .bar = a, \
> @@ -50,6 +55,7 @@ struct dw_edma_pcie_data {
> u8 irqs;
> u16 wr_ch_cnt;
> u16 rd_ch_cnt;
> + pci_bus_addr_t devmem_phys_off;
Based on your previous response, I don't think pci_bus_addr_t is the
right type. IIUC devmem_phys_off is not an address that a PCIe
analyzer would ever see in a TLP. It sounds like it's the result of
applying an iATU translation to a PCI bus address.
I'm not sure there's a special type for that, so u64 might be as good
as anything.
> };
>
> static const struct dw_edma_pcie_data snps_edda_data = {
> @@ -90,6 +96,89 @@ struct dw_edma_pcie_data {
> .rd_ch_cnt = 2,
> };
>
> +static const struct dw_edma_pcie_data amd_mdb_data = {
> + /* MDB registers location */
> + .rg.bar = BAR_0,
> + .rg.off = 0x00001000, /* 4 Kbytes */
> + .rg.sz = 0x00002000, /* 8 Kbytes */
> + /* MDB memory linked list location */
> + .ll_wr = {
> + /* Channel 0 - BAR 2, offset 0 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00000000, 0x00000800)
> + /* Channel 1 - BAR 2, offset 2 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00200000, 0x00000800)
> + },
> + .ll_rd = {
> + /* Channel 0 - BAR 2, offset 4 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00400000, 0x00000800)
> + /* Channel 1 - BAR 2, offset 6 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00600000, 0x00000800)
> + },
> + /* MDB memory data location */
> + .dt_wr = {
> + /* Channel 0 - BAR 2, offset 8 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00800000, 0x00000800)
> + /* Channel 1 - BAR 2, offset 9 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00900000, 0x00000800)
> + },
> + .dt_rd = {
> + /* Channel 0 - BAR 2, offset 10 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00a00000, 0x00000800)
> + /* Channel 1 - BAR 2, offset 11 Mbytes, size 2 Kbytes */
> + DW_BLOCK(BAR_2, 0x00b00000, 0x00000800)
> + },
> + /* Other */
> + .mf = EDMA_MF_HDMA_NATIVE,
> + .irqs = 1,
> + .wr_ch_cnt = 2,
> + .rd_ch_cnt = 2,
> +};
> +
> +static void dw_edma_assign_chan_data(struct dw_edma_pcie_data *pdata,
> + enum pci_barno bar)
> +{
> + u16 i;
> + off_t off = 0, offsz = 0x200000;
> + size_t size = 0x800;
> + u16 wr_ch = pdata->wr_ch_cnt;
> + u16 rd_ch = pdata->rd_ch_cnt;
> +
> + if (wr_ch <= 2 || rd_ch <= 2)
> + return;
> +
> + /* Write channel LL region */
> + for (i = 0; i < wr_ch; i++) {
> + pdata->ll_wr[i].bar = bar;
> + pdata->ll_wr[i].off = off;
> + pdata->ll_wr[i].sz = size;
> + off += offsz + size;
> + }
> +
> + /* Read channel LL region */
> + for (i = 0; i < rd_ch; i++) {
> + pdata->ll_rd[i].bar = bar;
> + pdata->ll_rd[i].off = off;
> + pdata->ll_rd[i].sz = size;
> + off += offsz + size;
> + }
> +
> + /* Write channel data region */
> + for (i = 0; i < wr_ch; i++) {
> + pdata->dt_wr[i].bar = bar;
> + pdata->dt_wr[i].off = off;
> + pdata->dt_wr[i].sz = size;
> + off += offsz + size;
> + }
> +
> + /* Read channel data region */
> + for (i = 0; i < rd_ch; i++) {
> + pdata->dt_rd[i].bar = bar;
> + pdata->dt_rd[i].off = off;
> + pdata->dt_rd[i].sz = size;
> + off += offsz + size;
> + }
> +}
> +
> static int dw_edma_pcie_irq_vector(struct device *dev, unsigned int nr)
> {
> return pci_irq_vector(to_pci_dev(dev), nr);
> @@ -121,7 +210,11 @@ static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
> u16 vsec;
> u64 off;
>
> - vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
> + /*
> + * Synopsys and AMD (Xilinx) use the same VSEC ID for the purpose
> + * of map, channel counts, etc.
> + */
> + vsec = pci_find_vsec_capability(pdev, pdev->vendor,
> DW_PCIE_VSEC_DMA_ID);
You must validate pdev->vendor first. Passing pdev->vendor means this
will find any VSEC with ID 0x6 on any device at all. For example, you
could find a VSEC with ID 0x6 on an Intel device where VSEC ID 0x6
means something completely different.
You have to know what the Vendor ID is before calling
pci_find_vsec_capability() because otherwise you don't know what the
VSEC ID means.
The best way to do this would be to use a separate #define for each
vendor to remove the assumption that each vendor uses the same ID:
#define DW_PCIE_SYNOPSYS_VSEC_DMA_ID 0x6
#define DW_PCIE_XILINX_VSEC_DMA_ID 0x6
vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
DW_PCIE_SYNOPSYS_VSEC_DMA_ID);
if (!vsec) {
vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_XILINX,
DW_PCIE_XILINX_VSEC_DMA_ID);
if (!vsec)
return;
}
That way it's clear that this only applies to Synopsys devices and
Xilinx devices, Synopsys and Xilinx implemented a VSEC with the same
semantics, and it's just a coincidence that they assigned the same
VSEC ID.
> if (!vsec)
> return;
> @@ -155,6 +248,24 @@ static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
> off <<= 32;
> off |= val;
> pdata->rg.off = off;
> +
> + /* AMD specific VSEC capability */
> + vsec = pci_find_vsec_capability(pdev, pdev->vendor,
> + DW_PCIE_XILINX_MDB_VSEC_ID);
Same here. This will find a VSEC with ID 0x20 on any device from any
vendor at all. But you only know what 0x20 means on Xilinx devices,
so this should be:
vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_XILINX,
DW_PCIE_XILINX_MDB_VSEC_ID);
> + if (!vsec)
> + return;
> +
> + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
> + if (PCI_VNDR_HEADER_ID(val) != 0x20 ||
> + PCI_VNDR_HEADER_REV(val) != 0x1)
> + return;
> +
> + pci_read_config_dword(pdev, vsec + 0xc, &val);
> + off = val;
> + pci_read_config_dword(pdev, vsec + 0x8, &val);
> + off <<= 32;
> + off |= val;
> + pdata->devmem_phys_off = off;
> }
>
> static int dw_edma_pcie_probe(struct pci_dev *pdev,
> @@ -179,6 +290,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
> }
>
> memcpy(vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
> + vsec_data->devmem_phys_off = DW_PCIE_AMD_MDB_INVALID_ADDR;
>
> /*
> * Tries to find if exists a PCIe Vendor-Specific Extended Capability
> @@ -186,6 +298,22 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
> */
> dw_edma_pcie_get_vsec_dma_data(pdev, vsec_data);
>
> + if (pdev->vendor == PCI_VENDOR_ID_XILINX) {
> + /*
> + * There is no valid address found for the LL memory
> + * space on the device side.
> + */
> + if (vsec_data->devmem_phys_off == DW_PCIE_AMD_MDB_INVALID_ADDR)
> + return -EINVAL;
> +
> + /*
> + * Configure the channel LL and data blocks if number of
> + * channels enabled in VSEC capability are more than the
> + * channels configured in amd_mdb_data.
> + */
> + dw_edma_assign_chan_data(vsec_data, BAR_2);
> + }
> +
> /* Mapping PCI BAR regions */
> mask = BIT(vsec_data->rg.bar);
> for (i = 0; i < vsec_data->wr_ch_cnt; i++) {
> @@ -367,6 +495,8 @@ static void dw_edma_pcie_remove(struct pci_dev *pdev)
>
> static const struct pci_device_id dw_edma_pcie_id_table[] = {
> { PCI_DEVICE_DATA(SYNOPSYS, EDDA, &snps_edda_data) },
> + { PCI_VDEVICE(XILINX, PCI_DEVICE_ID_AMD_MDB_B054),
> + (kernel_ulong_t)&amd_mdb_data },
> { }
> };
> MODULE_DEVICE_TABLE(pci, dw_edma_pcie_id_table);
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
2025-09-11 11:44 ` [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode Devendra K Verma
@ 2025-09-11 21:53 ` Bjorn Helgaas
2025-09-12 9:35 ` Verma, Devendra
0 siblings, 1 reply; 9+ messages in thread
From: Bjorn Helgaas @ 2025-09-11 21:53 UTC (permalink / raw)
To: Devendra K Verma
Cc: bhelgaas, mani, vkoul, dmaengine, linux-pci, linux-kernel,
michal.simek
On Thu, Sep 11, 2025 at 05:14:51PM +0530, Devendra K Verma wrote:
> AMD MDB IP supports Linked List (LL) mode as well as non-LL mode.
> The current code does not have the mechanisms to enable the
> DMA transactions using the non-LL mode. The following two cases
> are added with this patch:
> - When a valid physical base address is not configured via the
> Xilinx VSEC capability then the IP can still be used in non-LL
> mode. The default mode for all the DMA transactions and for all
> the DMA channels then is non-LL mode.
> - When a valid physical base address is configured but the client
> wants to use the non-LL mode for DMA transactions then also the
> flexibility is provided via the peripheral_config struct member of
> dma_slave_config. In this case the channels can be individually
> configured in non-LL mode. This use case is desirable for single
> DMA transfer of a chunk, this saves the effort of preparing the
> Link List.
> +static pci_bus_addr_t dw_edma_get_phys_addr(struct pci_dev *pdev,
> + struct dw_edma_pcie_data *pdata,
> + enum pci_barno bar)
> +{
> + if (pdev->vendor == PCI_VENDOR_ID_XILINX)
> + return pdata->devmem_phys_off;
> + return pci_bus_address(pdev, bar);
Does this imply that non-Xilinx devices don't have the iATU that
translates a PCI bus address to an internal device address?
> +}
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
2025-09-11 21:53 ` Bjorn Helgaas
@ 2025-09-12 9:35 ` Verma, Devendra
2025-09-12 15:33 ` Bjorn Helgaas
0 siblings, 1 reply; 9+ messages in thread
From: Verma, Devendra @ 2025-09-12 9:35 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: bhelgaas@google.com, mani@kernel.org, vkoul@kernel.org,
dmaengine@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org, Simek, Michal
[AMD Official Use Only - AMD Internal Distribution Only]
Hi Bjorn
Please check my comments inline.
Regards,
Devendra
> -----Original Message-----
> From: Bjorn Helgaas <helgaas@kernel.org>
> Sent: Friday, September 12, 2025 03:24
> To: Verma, Devendra <Devendra.Verma@amd.com>
> Cc: bhelgaas@google.com; mani@kernel.org; vkoul@kernel.org;
> dmaengine@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Simek, Michal <michal.simek@amd.com>
> Subject: Re: [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Thu, Sep 11, 2025 at 05:14:51PM +0530, Devendra K Verma wrote:
> > AMD MDB IP supports Linked List (LL) mode as well as non-LL mode.
> > The current code does not have the mechanisms to enable the DMA
> > transactions using the non-LL mode. The following two cases are added
> > with this patch:
> > - When a valid physical base address is not configured via the
> > Xilinx VSEC capability then the IP can still be used in non-LL
> > mode. The default mode for all the DMA transactions and for all
> > the DMA channels then is non-LL mode.
> > - When a valid physical base address is configured but the client
> > wants to use the non-LL mode for DMA transactions then also the
> > flexibility is provided via the peripheral_config struct member of
> > dma_slave_config. In this case the channels can be individually
> > configured in non-LL mode. This use case is desirable for single
> > DMA transfer of a chunk, this saves the effort of preparing the
> > Link List.
>
> > +static pci_bus_addr_t dw_edma_get_phys_addr(struct pci_dev *pdev,
> > + struct dw_edma_pcie_data *pdata,
> > + enum pci_barno bar) {
> > + if (pdev->vendor == PCI_VENDOR_ID_XILINX)
> > + return pdata->devmem_phys_off;
> > + return pci_bus_address(pdev, bar);
>
> Does this imply that non-Xilinx devices don't have the iATU that translates a PCI
> bus address to an internal device address?
>
Non-Xilinx devices can have iATU enabled or bypassed as well. In bypass mode
no translation is done and the TLPs are simply forwarded untranslated.
> > +}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
2025-09-12 9:35 ` Verma, Devendra
@ 2025-09-12 15:33 ` Bjorn Helgaas
2025-09-15 10:54 ` Verma, Devendra
0 siblings, 1 reply; 9+ messages in thread
From: Bjorn Helgaas @ 2025-09-12 15:33 UTC (permalink / raw)
To: Verma, Devendra
Cc: bhelgaas@google.com, mani@kernel.org, vkoul@kernel.org,
dmaengine@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org, Simek, Michal
On Fri, Sep 12, 2025 at 09:35:56AM +0000, Verma, Devendra wrote:
> > -----Original Message-----
> > From: Bjorn Helgaas <helgaas@kernel.org>
[redundant headers removed]
> > On Thu, Sep 11, 2025 at 05:14:51PM +0530, Devendra K Verma wrote:
> > > AMD MDB IP supports Linked List (LL) mode as well as non-LL mode.
> > > The current code does not have the mechanisms to enable the DMA
> > > transactions using the non-LL mode. The following two cases are added
> > > with this patch:
> > > - When a valid physical base address is not configured via the
> > > Xilinx VSEC capability then the IP can still be used in non-LL
> > > mode. The default mode for all the DMA transactions and for all
> > > the DMA channels then is non-LL mode.
> > > - When a valid physical base address is configured but the client
> > > wants to use the non-LL mode for DMA transactions then also the
> > > flexibility is provided via the peripheral_config struct member of
> > > dma_slave_config. In this case the channels can be individually
> > > configured in non-LL mode. This use case is desirable for single
> > > DMA transfer of a chunk, this saves the effort of preparing the
> > > Link List.
> >
> > > +static pci_bus_addr_t dw_edma_get_phys_addr(struct pci_dev *pdev,
> > > + struct dw_edma_pcie_data *pdata,
> > > + enum pci_barno bar) {
> > > + if (pdev->vendor == PCI_VENDOR_ID_XILINX)
> > > + return pdata->devmem_phys_off;
> > > + return pci_bus_address(pdev, bar);
> >
> > Does this imply that non-Xilinx devices don't have the iATU that
> > translates a PCI bus address to an internal device address?
>
> Non-Xilinx devices can have iATU enabled or bypassed as well. In
> bypass mode no translation is done and the TLPs are simply forwarded
> untranslated.
What happens on a non-Xilinx device with iATU enabled? Does
pci_bus_address() return the correct address in that case?
I can't figure out what's different about Xilinx that requires this
special handling.
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
2025-09-12 15:33 ` Bjorn Helgaas
@ 2025-09-15 10:54 ` Verma, Devendra
0 siblings, 0 replies; 9+ messages in thread
From: Verma, Devendra @ 2025-09-15 10:54 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: bhelgaas@google.com, mani@kernel.org, vkoul@kernel.org,
dmaengine@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org, Simek, Michal
[AMD Official Use Only - AMD Internal Distribution Only]
Hi Bjorn
Please check my response inline.
Regards,
Devendra
> -----Original Message-----
> From: Bjorn Helgaas <helgaas@kernel.org>
> Sent: Friday, September 12, 2025 21:04
> To: Verma, Devendra <Devendra.Verma@amd.com>
> Cc: bhelgaas@google.com; mani@kernel.org; vkoul@kernel.org;
> dmaengine@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Simek, Michal <michal.simek@amd.com>
> Subject: Re: [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Fri, Sep 12, 2025 at 09:35:56AM +0000, Verma, Devendra wrote:
> > > -----Original Message-----
> > > From: Bjorn Helgaas <helgaas@kernel.org>
>
> [redundant headers removed]
>
> > > On Thu, Sep 11, 2025 at 05:14:51PM +0530, Devendra K Verma wrote:
> > > > AMD MDB IP supports Linked List (LL) mode as well as non-LL mode.
> > > > The current code does not have the mechanisms to enable the DMA
> > > > transactions using the non-LL mode. The following two cases are
> > > > added with this patch:
> > > > - When a valid physical base address is not configured via the
> > > > Xilinx VSEC capability then the IP can still be used in non-LL
> > > > mode. The default mode for all the DMA transactions and for all
> > > > the DMA channels then is non-LL mode.
> > > > - When a valid physical base address is configured but the client
> > > > wants to use the non-LL mode for DMA transactions then also the
> > > > flexibility is provided via the peripheral_config struct member of
> > > > dma_slave_config. In this case the channels can be individually
> > > > configured in non-LL mode. This use case is desirable for single
> > > > DMA transfer of a chunk, this saves the effort of preparing the
> > > > Link List.
> > >
> > > > +static pci_bus_addr_t dw_edma_get_phys_addr(struct pci_dev *pdev,
> > > > + struct dw_edma_pcie_data *pdata,
> > > > + enum pci_barno bar) {
> > > > + if (pdev->vendor == PCI_VENDOR_ID_XILINX)
> > > > + return pdata->devmem_phys_off;
> > > > + return pci_bus_address(pdev, bar);
> > >
> > > Does this imply that non-Xilinx devices don't have the iATU that
> > > translates a PCI bus address to an internal device address?
> >
> > Non-Xilinx devices can have iATU enabled or bypassed as well. In
> > bypass mode no translation is done and the TLPs are simply forwarded
> > untranslated.
>
> What happens on a non-Xilinx device with iATU enabled? Does
> pci_bus_address() return the correct address in that case?
>
> I can't figure out what's different about Xilinx that requires this special handling.
I am not sure how non-Xilinx device operates, but as mentioned earlier, there is an
option where translation may not be required.AMD (Xilinx) design enables the iATU
that is the reason the device side DDR offset is retrieved and programmed.
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support
2025-09-11 21:50 ` Bjorn Helgaas
@ 2025-09-16 10:34 ` Verma, Devendra
0 siblings, 0 replies; 9+ messages in thread
From: Verma, Devendra @ 2025-09-16 10:34 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: bhelgaas@google.com, mani@kernel.org, vkoul@kernel.org,
dmaengine@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org, Simek, Michal
[AMD Official Use Only - AMD Internal Distribution Only]
Hi Bjorn
Thank you for the review, please check my comments inline.
Regards,
Devendra
> -----Original Message-----
> From: Bjorn Helgaas <helgaas@kernel.org>
> Sent: Friday, September 12, 2025 03:21
> To: Verma, Devendra <Devendra.Verma@amd.com>
> Cc: bhelgaas@google.com; mani@kernel.org; vkoul@kernel.org;
> dmaengine@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Simek, Michal <michal.simek@amd.com>
> Subject: Re: [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint
> Support
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On Thu, Sep 11, 2025 at 05:14:50PM +0530, Devendra K Verma wrote:
> > AMD MDB PCIe endpoint support. For AMD specific support added the
> > following
> > - AMD supported PCIe Device IDs and Vendor ID (Xilinx).
> > - AMD MDB specific driver data
> > - AMD MDB specific VSEC capability to retrieve the device DDR
> > base address.
> >
> > Signed-off-by: Devendra K Verma <devendra.verma@amd.com>
> > ---
> > Changes in v1:
> > Removed the pci device id from pci_ids.h file.
> > Added the vendor id macro as per the suggested method.
> > Changed the type of the newly added devmem_phys_off variable.
> > Added to logic to assign offsets for LL and data region blocks in case
> > more number of channels are enabled than given in amd_mdb_data struct.
> > ---
> > drivers/dma/dw-edma/dw-edma-pcie.c | 132
> > ++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 131 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c
> > b/drivers/dma/dw-edma/dw-edma-pcie.c
> > index 3371e0a7..48ecfce 100644
> > --- a/drivers/dma/dw-edma/dw-edma-pcie.c
> > +++ b/drivers/dma/dw-edma/dw-edma-pcie.c
> > @@ -23,6 +23,11 @@
> > #define DW_PCIE_VSEC_DMA_WR_CH GENMASK(9, 0)
> > #define DW_PCIE_VSEC_DMA_RD_CH GENMASK(25, 16)
> >
> > +/* AMD MDB specific defines */
> > +#define DW_PCIE_XILINX_MDB_VSEC_ID 0x20
> > +#define PCI_DEVICE_ID_AMD_MDB_B054 0xb054
> > +#define DW_PCIE_AMD_MDB_INVALID_ADDR (~0ULL)
> > +
> > #define DW_BLOCK(a, b, c) \
> > { \
> > .bar = a, \
> > @@ -50,6 +55,7 @@ struct dw_edma_pcie_data {
> > u8 irqs;
> > u16 wr_ch_cnt;
> > u16 rd_ch_cnt;
> > + pci_bus_addr_t devmem_phys_off;
>
> Based on your previous response, I don't think pci_bus_addr_t is the right type. IIUC
> devmem_phys_off is not an address that a PCIe analyzer would ever see in a TLP.
> It sounds like it's the result of applying an iATU translation to a PCI bus address.
>
> I'm not sure there's a special type for that, so u64 might be as good as anything.
>
Sure, will use u64.
> > };
> >
> > static const struct dw_edma_pcie_data snps_edda_data = { @@ -90,6
> > +96,89 @@ struct dw_edma_pcie_data {
> > .rd_ch_cnt = 2,
> > };
> >
> > +static const struct dw_edma_pcie_data amd_mdb_data = {
> > + /* MDB registers location */
> > + .rg.bar = BAR_0,
> > + .rg.off = 0x00001000, /* 4 Kbytes */
> > + .rg.sz = 0x00002000, /* 8 Kbytes */
> > + /* MDB memory linked list location */
> > + .ll_wr = {
> > + /* Channel 0 - BAR 2, offset 0 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00000000, 0x00000800)
> > + /* Channel 1 - BAR 2, offset 2 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00200000, 0x00000800)
> > + },
> > + .ll_rd = {
> > + /* Channel 0 - BAR 2, offset 4 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00400000, 0x00000800)
> > + /* Channel 1 - BAR 2, offset 6 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00600000, 0x00000800)
> > + },
> > + /* MDB memory data location */
> > + .dt_wr = {
> > + /* Channel 0 - BAR 2, offset 8 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00800000, 0x00000800)
> > + /* Channel 1 - BAR 2, offset 9 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00900000, 0x00000800)
> > + },
> > + .dt_rd = {
> > + /* Channel 0 - BAR 2, offset 10 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00a00000, 0x00000800)
> > + /* Channel 1 - BAR 2, offset 11 Mbytes, size 2 Kbytes */
> > + DW_BLOCK(BAR_2, 0x00b00000, 0x00000800)
> > + },
> > + /* Other */
> > + .mf = EDMA_MF_HDMA_NATIVE,
> > + .irqs = 1,
> > + .wr_ch_cnt = 2,
> > + .rd_ch_cnt = 2,
> > +};
> > +
> > +static void dw_edma_assign_chan_data(struct dw_edma_pcie_data *pdata,
> > + enum pci_barno bar) {
> > + u16 i;
> > + off_t off = 0, offsz = 0x200000;
> > + size_t size = 0x800;
> > + u16 wr_ch = pdata->wr_ch_cnt;
> > + u16 rd_ch = pdata->rd_ch_cnt;
> > +
> > + if (wr_ch <= 2 || rd_ch <= 2)
> > + return;
> > +
> > + /* Write channel LL region */
> > + for (i = 0; i < wr_ch; i++) {
> > + pdata->ll_wr[i].bar = bar;
> > + pdata->ll_wr[i].off = off;
> > + pdata->ll_wr[i].sz = size;
> > + off += offsz + size;
> > + }
> > +
> > + /* Read channel LL region */
> > + for (i = 0; i < rd_ch; i++) {
> > + pdata->ll_rd[i].bar = bar;
> > + pdata->ll_rd[i].off = off;
> > + pdata->ll_rd[i].sz = size;
> > + off += offsz + size;
> > + }
> > +
> > + /* Write channel data region */
> > + for (i = 0; i < wr_ch; i++) {
> > + pdata->dt_wr[i].bar = bar;
> > + pdata->dt_wr[i].off = off;
> > + pdata->dt_wr[i].sz = size;
> > + off += offsz + size;
> > + }
> > +
> > + /* Read channel data region */
> > + for (i = 0; i < rd_ch; i++) {
> > + pdata->dt_rd[i].bar = bar;
> > + pdata->dt_rd[i].off = off;
> > + pdata->dt_rd[i].sz = size;
> > + off += offsz + size;
> > + }
> > +}
> > +
> > static int dw_edma_pcie_irq_vector(struct device *dev, unsigned int
> > nr) {
> > return pci_irq_vector(to_pci_dev(dev), nr); @@ -121,7 +210,11 @@
> > static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
> > u16 vsec;
> > u64 off;
> >
> > - vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
> > + /*
> > + * Synopsys and AMD (Xilinx) use the same VSEC ID for the purpose
> > + * of map, channel counts, etc.
> > + */
> > + vsec = pci_find_vsec_capability(pdev, pdev->vendor,
> > DW_PCIE_VSEC_DMA_ID);
>
> You must validate pdev->vendor first. Passing pdev->vendor means this will find
> any VSEC with ID 0x6 on any device at all. For example, you could find a VSEC
> with ID 0x6 on an Intel device where VSEC ID 0x6 means something completely
> different.
>
> You have to know what the Vendor ID is before calling
> pci_find_vsec_capability() because otherwise you don't know what the VSEC ID
> means.
>
> The best way to do this would be to use a separate #define for each vendor to
> remove the assumption that each vendor uses the same ID:
>
> #define DW_PCIE_SYNOPSYS_VSEC_DMA_ID 0x6
> #define DW_PCIE_XILINX_VSEC_DMA_ID 0x6
>
> vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
> DW_PCIE_SYNOPSYS_VSEC_DMA_ID);
> if (!vsec) {
> vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_XILINX,
> DW_PCIE_XILINX_VSEC_DMA_ID);
> if (!vsec)
> return;
> }
>
> That way it's clear that this only applies to Synopsys devices and Xilinx devices,
> Synopsys and Xilinx implemented a VSEC with the same semantics, and it's just a
> coincidence that they assigned the same VSEC ID.
>
Thank you for the suggestions. I will do the following modifications
- Add vendor check for VSECID = 0x6
- Separating the VSEC IDs for AMD (Xilinx) devices.
- Keeping the Code related to Synopsys unchanged for the backward compatibility.
This way if someone is using the Synopsys vendor then they do not need to do any
changes, if any were required due to the above modifications.
> > if (!vsec)
> > return;
> > @@ -155,6 +248,24 @@ static void dw_edma_pcie_get_vsec_dma_data(struct
> pci_dev *pdev,
> > off <<= 32;
> > off |= val;
> > pdata->rg.off = off;
> > +
> > + /* AMD specific VSEC capability */
> > + vsec = pci_find_vsec_capability(pdev, pdev->vendor,
> > + DW_PCIE_XILINX_MDB_VSEC_ID);
>
> Same here. This will find a VSEC with ID 0x20 on any device from any vendor at all.
> But you only know what 0x20 means on Xilinx devices, so this should be:
>
> vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_XILINX,
> DW_PCIE_XILINX_MDB_VSEC_ID);
>
Suggestion taken. Thank you!
> > + if (!vsec)
> > + return;
> > +
> > + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
> > + if (PCI_VNDR_HEADER_ID(val) != 0x20 ||
> > + PCI_VNDR_HEADER_REV(val) != 0x1)
> > + return;
> > +
> > + pci_read_config_dword(pdev, vsec + 0xc, &val);
> > + off = val;
> > + pci_read_config_dword(pdev, vsec + 0x8, &val);
> > + off <<= 32;
> > + off |= val;
> > + pdata->devmem_phys_off = off;
> > }
> >
> > static int dw_edma_pcie_probe(struct pci_dev *pdev, @@ -179,6 +290,7
> > @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
> > }
> >
> > memcpy(vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
> > + vsec_data->devmem_phys_off = DW_PCIE_AMD_MDB_INVALID_ADDR;
> >
> > /*
> > * Tries to find if exists a PCIe Vendor-Specific Extended
> > Capability @@ -186,6 +298,22 @@ static int dw_edma_pcie_probe(struct pci_dev
> *pdev,
> > */
> > dw_edma_pcie_get_vsec_dma_data(pdev, vsec_data);
> >
> > + if (pdev->vendor == PCI_VENDOR_ID_XILINX) {
> > + /*
> > + * There is no valid address found for the LL memory
> > + * space on the device side.
> > + */
> > + if (vsec_data->devmem_phys_off ==
> DW_PCIE_AMD_MDB_INVALID_ADDR)
> > + return -EINVAL;
> > +
> > + /*
> > + * Configure the channel LL and data blocks if number of
> > + * channels enabled in VSEC capability are more than the
> > + * channels configured in amd_mdb_data.
> > + */
> > + dw_edma_assign_chan_data(vsec_data, BAR_2);
> > + }
> > +
> > /* Mapping PCI BAR regions */
> > mask = BIT(vsec_data->rg.bar);
> > for (i = 0; i < vsec_data->wr_ch_cnt; i++) { @@ -367,6 +495,8 @@
> > static void dw_edma_pcie_remove(struct pci_dev *pdev)
> >
> > static const struct pci_device_id dw_edma_pcie_id_table[] = {
> > { PCI_DEVICE_DATA(SYNOPSYS, EDDA, &snps_edda_data) },
> > + { PCI_VDEVICE(XILINX, PCI_DEVICE_ID_AMD_MDB_B054),
> > + (kernel_ulong_t)&amd_mdb_data },
> > { }
> > };
> > MODULE_DEVICE_TABLE(pci, dw_edma_pcie_id_table);
> > --
> > 1.8.3.1
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-09-16 10:34 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-11 11:44 [PATCH v1 0/2] Add AMD MDB Endpoint and non-LL mode Support Devendra K Verma
2025-09-11 11:44 ` [PATCH v1 1/2] dmaengine: dw-edma: Add AMD MDB Endpoint Support Devendra K Verma
2025-09-11 21:50 ` Bjorn Helgaas
2025-09-16 10:34 ` Verma, Devendra
2025-09-11 11:44 ` [PATCH v1 2/2] dmaengine: dw-edma: Add non-LL mode Devendra K Verma
2025-09-11 21:53 ` Bjorn Helgaas
2025-09-12 9:35 ` Verma, Devendra
2025-09-12 15:33 ` Bjorn Helgaas
2025-09-15 10:54 ` Verma, Devendra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox