[PATCH v3 1/3] dmaengine: dw-edma-pcie: Discover endpoint DMA metadata

From: Koichiro Den

Date: Sat Jun 20 2026 - 13:10:12 EST


Teach dw-edma-pcie to discover a PCI endpoint DMA function from
BAR-resident metadata. The metadata supplies the DMA register window,
channel counts, descriptor windows, optional auxiliary windows, and
endpoint-local descriptor and auxiliary addresses. Accept DesignWare
eDMA unroll, HDMA compatible, and HDMA native linked-list layouts.

Endpoint-provided DMA channels use raw slave addresses because the host
programs transfers against endpoint physical addresses, not PCI BAR
addresses. The host-side dw-edma-pcie instance is remote-routed by
default, so delegated channels report completions through IMWr/MSI.

Endpoint DMA metadata currently has no static PCI ID. Let an explicit
driver_override bind use the generic endpoint DMA metadata parser, but
do not treat arbitrary dynamic IDs without driver data as endpoint DMA
devices.

The endpoint polls HOST_REQ at a low idle rate before programming DMA
window submaps and setting READY. Let the host wait for several endpoint
poll periods before treating the READY handshake as timed out.

Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
---
Changes in v3:
- Select endpoint DMA match data before copying DMA data (Sashiko).
- Require driver_override for the generic endpoint DMA fallback
(Sashiko).
- Accept HDMA native linked-list endpoint DMA metadata.

drivers/dma/dw-edma/dw-edma-pcie.c | 380 ++++++++++++++++++++++++++++-
1 file changed, 378 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 1e75fefae9b8..2a56ee19d4cb 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -11,9 +11,13 @@
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/dma/edma.h>
+#include <linux/iopoll.h>
#include <linux/pci-epf.h>
#include <linux/msi.h>
#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/overflow.h>
+#include <linux/pci-ep-dma.h>
#include <linux/sizes.h>

#include "dw-edma-core.h"
@@ -45,6 +49,9 @@
#define DW_PCIE_XILINX_MDB_DT_OFF_GAP 0x100000
#define DW_PCIE_XILINX_MDB_DT_SIZE 0x800

+#define DW_PCIE_EP_DMA_READY_POLL_US 1000
+#define DW_PCIE_EP_DMA_READY_TIMEOUT_US 2000000
+
#define DW_BLOCK(a, b, c) \
{ \
.bar = a, \
@@ -94,6 +101,12 @@ struct dw_edma_pcie_match_data {
#define DW_EDMA_PCIE_F_DEVMEM_PHYS_OFF BIT(0)
#define DW_EDMA_PCIE_F_REG_OFFSET BIT(1)

+struct dw_edma_pcie_ep_dma_view {
+ struct pci_dev *pdev;
+ void __iomem *base;
+ resource_size_t limit;
+};
+
static const struct dw_edma_pcie_data snps_edda_data = {
/* eDMA registers location */
.rg.bar = BAR_0,
@@ -158,6 +171,13 @@ static const struct dw_edma_pcie_data xilinx_cpm6_dma_data = {
.rd_ch_cnt = 8,
};

+static const struct dw_edma_pcie_data ep_dma_data = {
+ .mf = EDMA_MF_EDMA_UNROLL,
+ .irqs = EDMA_MAX_WR_CH + EDMA_MAX_RD_CH,
+ .wr_ch_cnt = EDMA_MAX_WR_CH,
+ .rd_ch_cnt = EDMA_MAX_RD_CH,
+};
+
static void dw_edma_set_chan_region_offset(struct dw_edma_pcie_data *pdata,
enum pci_barno bar, off_t start_off,
off_t ll_off_gap, size_t ll_size,
@@ -227,6 +247,86 @@ static const struct dw_edma_plat_ops dw_edma_pcie_plat_ops = {
.pci_address = dw_edma_pcie_address,
};

+static const struct dw_edma_plat_ops dw_edma_pcie_raw_addr_plat_ops = {
+ .irq_vector = dw_edma_pcie_irq_vector,
+};
+
+static bool dw_edma_pcie_valid_bar(enum pci_barno bar)
+{
+ return bar >= BAR_0 && bar <= BAR_5;
+}
+
+static bool dw_edma_pcie_valid_bar_range(struct pci_dev *pdev,
+ enum pci_barno bar, u64 off,
+ size_t sz)
+{
+ resource_size_t bar_len;
+
+ if (!dw_edma_pcie_valid_bar(bar) || !sz)
+ return false;
+
+ bar_len = pci_resource_len(pdev, bar);
+
+ return off <= bar_len && sz <= bar_len - off;
+}
+
+static bool dw_edma_pcie_valid_block(struct pci_dev *pdev,
+ const struct dw_edma_block *block)
+{
+ return dw_edma_pcie_valid_bar_range(pdev, block->bar, block->off,
+ block->sz);
+}
+
+static bool dw_edma_pcie_ep_dma_bar_scannable(struct pci_dev *pdev,
+ enum pci_barno bar)
+{
+ unsigned long flags = pci_resource_flags(pdev, bar);
+
+ if (!(flags & IORESOURCE_MEM))
+ return false;
+
+ if (flags & (IORESOURCE_UNSET | IORESOURCE_DISABLED))
+ return false;
+
+ return pci_resource_len(pdev, bar) >= PCI_EP_DMA_METADATA_HDR_LEN;
+}
+
+static u32 dw_edma_pcie_ep_dma_readl(struct dw_edma_pcie_ep_dma_view *view,
+ u16 off)
+{
+ return readl(view->base + off);
+}
+
+static void dw_edma_pcie_ep_dma_writel(struct dw_edma_pcie_ep_dma_view *view,
+ u16 off, u32 val)
+{
+ writel(val, view->base + off);
+}
+
+static u64 dw_edma_pcie_ep_dma_read64(struct dw_edma_pcie_ep_dma_view *view,
+ u16 lo, u16 hi)
+{
+ u64 val;
+
+ val = dw_edma_pcie_ep_dma_readl(view, hi);
+
+ return (val << 32) | dw_edma_pcie_ep_dma_readl(view, lo);
+}
+
+static int dw_edma_pcie_ep_dma_read_off(struct dw_edma_pcie_ep_dma_view *view,
+ u16 lo, u16 hi, off_t *off)
+{
+ u64 val;
+
+ val = dw_edma_pcie_ep_dma_read64(view, lo, hi);
+ if (val > type_max(*off))
+ return -EINVAL;
+
+ *off = val;
+
+ return 0;
+}
+
static void dw_edma_pcie_get_synopsys_dma_data(struct pci_dev *pdev,
struct dw_edma_pcie_data *pdata)
{
@@ -328,6 +428,265 @@ static void dw_edma_pcie_get_xilinx_dma_data(struct pci_dev *pdev,
pdata->devmem_phys_off = off;
}

+static int
+dw_edma_pcie_parse_ep_dma_ch_table(struct dw_edma_pcie_ep_dma_view *view,
+ struct dw_edma_pcie_data *pdata,
+ u16 table_off, u16 entry_size, u16 ch_cnt,
+ bool write)
+{
+ struct dw_edma_block *desc_blocks = write ? pdata->ll_wr : pdata->ll_rd;
+ struct dw_edma_block *data_blocks = write ? pdata->dt_wr : pdata->dt_rd;
+ u32 ctrl;
+ u16 i;
+ int ret;
+
+ for (i = 0; i < ch_cnt; i++) {
+ struct dw_edma_block *desc_block = &desc_blocks[i];
+ struct dw_edma_block *data_block = &data_blocks[i];
+ u16 off = table_off + i * entry_size;
+ u16 field, lo, hi;
+
+ field = off + PCI_EP_DMA_METADATA_CH_CTRL;
+ ctrl = dw_edma_pcie_ep_dma_readl(view, field);
+ if (FIELD_GET(PCI_EP_DMA_METADATA_CH_CTRL_HW_CH, ctrl) != i)
+ return -EOPNOTSUPP;
+
+ desc_block->bar =
+ FIELD_GET(PCI_EP_DMA_METADATA_CH_CTRL_DESC_BAR, ctrl);
+ lo = off + PCI_EP_DMA_METADATA_CH_DESC_OFF_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_DESC_OFF_HI;
+ ret = dw_edma_pcie_ep_dma_read_off(view, lo, hi,
+ &desc_block->off);
+ if (ret)
+ return ret;
+ field = off + PCI_EP_DMA_METADATA_CH_DESC_SIZE;
+ desc_block->sz = dw_edma_pcie_ep_dma_readl(view, field);
+ lo = off + PCI_EP_DMA_METADATA_CH_DESC_ADDR_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_DESC_ADDR_HI;
+ desc_block->paddr =
+ dw_edma_pcie_ep_dma_read64(view, lo, hi);
+ desc_block->paddr_valid = true;
+ if (!dw_edma_pcie_valid_block(view->pdev, desc_block))
+ return -EINVAL;
+
+ *data_block = (struct dw_edma_block) { .bar = NO_BAR };
+ if (!(ctrl & PCI_EP_DMA_METADATA_CH_CTRL_AUX_VALID))
+ continue;
+
+ data_block->bar =
+ FIELD_GET(PCI_EP_DMA_METADATA_CH_CTRL_AUX_BAR, ctrl);
+ lo = off + PCI_EP_DMA_METADATA_CH_AUX_OFF_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_AUX_OFF_HI;
+ ret = dw_edma_pcie_ep_dma_read_off(view, lo, hi,
+ &data_block->off);
+ if (ret)
+ return ret;
+ field = off + PCI_EP_DMA_METADATA_CH_AUX_SIZE;
+ data_block->sz = dw_edma_pcie_ep_dma_readl(view, field);
+ lo = off + PCI_EP_DMA_METADATA_CH_AUX_ADDR_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_AUX_ADDR_HI;
+ data_block->paddr =
+ dw_edma_pcie_ep_dma_read64(view, lo, hi);
+ data_block->paddr_valid = true;
+ if (!dw_edma_pcie_valid_block(view->pdev, data_block))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dw_edma_pcie_ep_dma_wait_ready(struct dw_edma_pcie_ep_dma_view *view)
+{
+ u32 val;
+
+ return read_poll_timeout(dw_edma_pcie_ep_dma_readl, val,
+ val & PCI_EP_DMA_METADATA_CTRL_READY,
+ DW_PCIE_EP_DMA_READY_POLL_US,
+ DW_PCIE_EP_DMA_READY_TIMEOUT_US, false,
+ view, PCI_EP_DMA_METADATA_CTRL);
+}
+
+static int
+dw_edma_pcie_validate_ep_dma_metadata(struct dw_edma_pcie_ep_dma_view *view,
+ u32 *metadata_ctrl, u8 *reg_layout_data)
+{
+ size_t table_size, table_end;
+ enum pci_barno reg_bar;
+ u16 len, entry_size;
+ u16 wr_ch_cnt, rd_ch_cnt;
+ u8 layout, layout_data;
+ u32 val;
+
+ val = dw_edma_pcie_ep_dma_readl(view, 0);
+ if (val != PCI_EP_DMA_METADATA_MAGIC)
+ return -ENODEV;
+
+ val = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_HDR);
+ if (FIELD_GET(PCI_EP_DMA_METADATA_HDR_REV, val) !=
+ PCI_EP_DMA_METADATA_REV)
+ return -EINVAL;
+
+ len = FIELD_GET(PCI_EP_DMA_METADATA_HDR_LEN_FIELD, val);
+ if (len < PCI_EP_DMA_METADATA_HDR_LEN)
+ return -EINVAL;
+ if (len > view->limit)
+ return -EINVAL;
+
+ val = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_REG_LAYOUT);
+ layout = FIELD_GET(PCI_EP_DMA_METADATA_REG_LAYOUT_ID, val);
+ if (layout != PCI_EP_DMA_METADATA_REG_LAYOUT_DW_EDMA)
+ return -EOPNOTSUPP;
+
+ layout_data = FIELD_GET(PCI_EP_DMA_METADATA_REG_LAYOUT_DATA, val);
+ if (layout_data == EDMA_MF_EDMA_LEGACY)
+ return -EOPNOTSUPP;
+ if (layout_data != EDMA_MF_EDMA_UNROLL &&
+ layout_data != EDMA_MF_HDMA_COMPAT &&
+ layout_data != EDMA_MF_HDMA_NATIVE)
+ return -EINVAL;
+
+ val = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_CTRL);
+ reg_bar = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_REG_BAR, val);
+ if (!dw_edma_pcie_valid_bar(reg_bar))
+ return -EINVAL;
+
+ wr_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_WR_CH_COUNT, val);
+ rd_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_RD_CH_COUNT, val);
+ if (!wr_ch_cnt && !rd_ch_cnt)
+ return -EINVAL;
+ if (wr_ch_cnt > EDMA_MAX_WR_CH || rd_ch_cnt > EDMA_MAX_RD_CH)
+ return -EINVAL;
+
+ entry_size = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_CH_ENTRY_SIZE, val);
+ if (entry_size < PCI_EP_DMA_METADATA_CH_ENTRY_SIZE ||
+ entry_size % sizeof(u32))
+ return -EINVAL;
+
+ if (check_mul_overflow((size_t)(wr_ch_cnt + rd_ch_cnt),
+ (size_t)entry_size, &table_size) ||
+ check_add_overflow((size_t)PCI_EP_DMA_METADATA_HDR_LEN,
+ table_size, &table_end) ||
+ table_end > len)
+ return -EINVAL;
+
+ if (metadata_ctrl)
+ *metadata_ctrl = val;
+ if (reg_layout_data)
+ *reg_layout_data = layout_data;
+
+ return 0;
+}
+
+static int
+dw_edma_pcie_parse_ep_dma_data(struct dw_edma_pcie_ep_dma_view *view,
+ struct dw_edma_pcie_data *pdata)
+{
+ u32 ctrl, reg_sz;
+ u8 reg_layout_data;
+ u64 reg_off;
+ u16 wr_table, rd_table, entry_size;
+ u16 wr_ch_cnt, rd_ch_cnt;
+ int ret;
+
+ ret = dw_edma_pcie_validate_ep_dma_metadata(view, &ctrl,
+ &reg_layout_data);
+ if (ret)
+ return ret;
+
+ pci_dbg(view->pdev, "Detected PCI endpoint DMA BAR metadata\n");
+
+ pdata->mf = reg_layout_data;
+ pdata->rg.bar = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_REG_BAR, ctrl);
+
+ wr_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_WR_CH_COUNT, ctrl);
+ rd_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_RD_CH_COUNT, ctrl);
+ pdata->wr_ch_cnt = min_t(u16, pdata->wr_ch_cnt, wr_ch_cnt);
+ pdata->rd_ch_cnt = min_t(u16, pdata->rd_ch_cnt, rd_ch_cnt);
+ pdata->irqs = pdata->wr_ch_cnt + pdata->rd_ch_cnt;
+ reg_off = dw_edma_pcie_ep_dma_read64(view,
+ PCI_EP_DMA_METADATA_REG_OFF_LO,
+ PCI_EP_DMA_METADATA_REG_OFF_HI);
+ reg_sz = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_REG_SIZE);
+ if (reg_off > type_max(pdata->rg.off) ||
+ !dw_edma_pcie_valid_bar_range(view->pdev, pdata->rg.bar,
+ reg_off, reg_sz))
+ return -EINVAL;
+ pdata->rg.off = reg_off;
+ pdata->rg.sz = reg_sz;
+
+ entry_size = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_CH_ENTRY_SIZE, ctrl);
+ wr_table = PCI_EP_DMA_METADATA_HDR_LEN;
+ rd_table = PCI_EP_DMA_METADATA_HDR_LEN + wr_ch_cnt * entry_size;
+
+ ret = dw_edma_pcie_parse_ep_dma_ch_table(view, pdata, wr_table,
+ entry_size, pdata->wr_ch_cnt,
+ true);
+ if (ret)
+ return ret;
+
+ return dw_edma_pcie_parse_ep_dma_ch_table(view, pdata, rd_table,
+ entry_size,
+ pdata->rd_ch_cnt, false);
+}
+
+static int
+dw_edma_pcie_parse_ep_dma_caps(struct pci_dev *pdev,
+ struct dw_edma_pcie_data *pdata)
+{
+ struct dw_edma_pcie_ep_dma_view metadata_view;
+ void __iomem *base;
+ resource_size_t bar_len;
+ enum pci_barno bar;
+ u32 ctrl;
+ int ret;
+
+ for (bar = BAR_0; bar < PCI_STD_NUM_BARS; bar++) {
+ if (!dw_edma_pcie_ep_dma_bar_scannable(pdev, bar))
+ continue;
+
+ bar_len = pci_resource_len(pdev, bar);
+ base = pci_iomap_range(pdev, bar, 0, 0);
+ if (!base)
+ continue;
+
+ metadata_view = (struct dw_edma_pcie_ep_dma_view) {
+ .pdev = pdev,
+ .base = base,
+ .limit = bar_len,
+ };
+ ret = dw_edma_pcie_validate_ep_dma_metadata(&metadata_view,
+ NULL, NULL);
+ if (ret == -ENODEV) {
+ pci_iounmap(metadata_view.pdev, base);
+ continue;
+ }
+ if (ret) {
+ pci_iounmap(metadata_view.pdev, base);
+ return ret;
+ }
+
+ ctrl = dw_edma_pcie_ep_dma_readl(&metadata_view,
+ PCI_EP_DMA_METADATA_CTRL);
+ ctrl |= PCI_EP_DMA_METADATA_CTRL_HOST_REQ;
+ dw_edma_pcie_ep_dma_writel(&metadata_view,
+ PCI_EP_DMA_METADATA_CTRL, ctrl);
+
+ ret = dw_edma_pcie_ep_dma_wait_ready(&metadata_view);
+ if (ret) {
+ pci_iounmap(metadata_view.pdev, base);
+ return ret;
+ }
+
+ ret = dw_edma_pcie_parse_ep_dma_data(&metadata_view, pdata);
+ pci_iounmap(metadata_view.pdev, base);
+
+ return ret;
+ }
+
+ return -ENODEV;
+}
+
static int
dw_edma_pcie_parse_synopsys_caps(struct pci_dev *pdev,
struct dw_edma_pcie_data *pdata)
@@ -367,6 +726,14 @@ dw_edma_pcie_parse_xilinx_caps(struct pci_dev *pdev,
return 0;
}

+static const struct dw_edma_pcie_match_data ep_dma_match_data = {
+ .data = &ep_dma_data,
+ .plat_ops = &dw_edma_pcie_raw_addr_plat_ops,
+ .parse_caps = dw_edma_pcie_parse_ep_dma_caps,
+ .flags = DW_EDMA_PCIE_F_REG_OFFSET,
+ .chip_flags = DW_EDMA_CHIP_PARTIAL,
+};
+
static u64 dw_edma_get_phys_addr(struct pci_dev *pdev,
const struct dw_edma_pcie_match_data *match,
struct dw_edma_pcie_data *pdata,
@@ -400,8 +767,17 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
int err, nr_irqs;
int i, mask;

- if (!match)
- return -ENODEV;
+ if (!match) {
+ /*
+ * The endpoint DMA metadata path has no static PCI ID yet.
+ * Accept it only for an explicit driver_override bind, not for
+ * arbitrary dynamic IDs without driver data.
+ */
+ if (!device_has_driver_override(&pdev->dev))
+ return -ENODEV;
+
+ match = &ep_dma_match_data;
+ }
pdata = match->data;

if (!pdata)
--
2.51.0