[PATCH v2 1/3] dmaengine: dw-edma-pcie: Discover endpoint DMA metadata
From: Koichiro Den
Date: Mon May 25 2026 - 02:35:38 EST
Teach dw-edma-pcie to discover a PCI endpoint DMA function from
BAR-resident metadata. The metadata supplies the DMA register window,
channel counts, descriptor windows, optional auxiliary windows, and
endpoint-local descriptor and auxiliary addresses.
Endpoint-provided DMA channels use raw slave addresses because the host
programs transfers against endpoint physical addresses, not PCI BAR
addresses. Scope the default remote interrupt mode to the endpoint DMA
metadata match entry so EDDA and MDB keep their existing local interrupt
behavior.
Endpoint DMA metadata can be discovered after an explicit bind through
driver_override or a dynamic ID. For such binds, there is no static
match data, so the driver falls back to the generic endpoint DMA
metadata parser.
The endpoint polls HOST_REQ at a low idle rate before programming DMA
window submaps and setting READY. Let the host wait for several endpoint
poll periods before treating the READY handshake as timed out.
Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
---
Changes in v2:
- Add raw-address platform ops and select them from the endpoint DMA
match entry.
drivers/dma/dw-edma/dw-edma-pcie.c | 374 ++++++++++++++++++++++++++++-
1 file changed, 373 insertions(+), 1 deletion(-)
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 12229a9301cd..7e173ad01220 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -11,9 +11,13 @@
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/dma/edma.h>
+#include <linux/iopoll.h>
#include <linux/pci-epf.h>
#include <linux/msi.h>
#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/overflow.h>
+#include <linux/pci-ep-dma.h>
#include <linux/sizes.h>
#include "dw-edma-core.h"
@@ -44,6 +48,9 @@
#define DW_PCIE_XILINX_MDB_DT_OFF_GAP 0x100000
#define DW_PCIE_XILINX_MDB_DT_SIZE 0x800
+#define DW_PCIE_EP_DMA_READY_POLL_US 1000
+#define DW_PCIE_EP_DMA_READY_TIMEOUT_US 2000000
+
#define DW_BLOCK(a, b, c) \
{ \
.bar = a, \
@@ -94,6 +101,12 @@ struct dw_edma_pcie_match_data {
#define DW_EDMA_PCIE_F_DEVMEM_PHYS_OFF BIT(0)
#define DW_EDMA_PCIE_F_REG_OFFSET BIT(1)
+struct dw_edma_pcie_ep_dma_view {
+ struct pci_dev *pdev;
+ void __iomem *base;
+ resource_size_t limit;
+};
+
static const struct dw_edma_pcie_data snps_edda_data = {
/* eDMA registers location */
.rg.bar = BAR_0,
@@ -145,6 +158,13 @@ static const struct dw_edma_pcie_data xilinx_mdb_data = {
.rd_ch_cnt = 8,
};
+static const struct dw_edma_pcie_data ep_dma_data = {
+ .mf = EDMA_MF_EDMA_UNROLL,
+ .irqs = EDMA_MAX_WR_CH + EDMA_MAX_RD_CH,
+ .wr_ch_cnt = EDMA_MAX_WR_CH,
+ .rd_ch_cnt = EDMA_MAX_RD_CH,
+};
+
static void dw_edma_set_chan_region_offset(struct dw_edma_pcie_data *pdata,
enum pci_barno bar, off_t start_off,
off_t ll_off_gap, size_t ll_size,
@@ -214,6 +234,86 @@ static const struct dw_edma_plat_ops dw_edma_pcie_plat_ops = {
.pci_address = dw_edma_pcie_address,
};
+static const struct dw_edma_plat_ops dw_edma_pcie_raw_addr_plat_ops = {
+ .irq_vector = dw_edma_pcie_irq_vector,
+};
+
+static bool dw_edma_pcie_valid_bar(enum pci_barno bar)
+{
+ return bar >= BAR_0 && bar <= BAR_5;
+}
+
+static bool dw_edma_pcie_valid_bar_range(struct pci_dev *pdev,
+ enum pci_barno bar, u64 off,
+ size_t sz)
+{
+ resource_size_t bar_len;
+
+ if (!dw_edma_pcie_valid_bar(bar) || !sz)
+ return false;
+
+ bar_len = pci_resource_len(pdev, bar);
+
+ return off <= bar_len && sz <= bar_len - off;
+}
+
+static bool dw_edma_pcie_valid_block(struct pci_dev *pdev,
+ const struct dw_edma_block *block)
+{
+ return dw_edma_pcie_valid_bar_range(pdev, block->bar, block->off,
+ block->sz);
+}
+
+static bool dw_edma_pcie_ep_dma_bar_scannable(struct pci_dev *pdev,
+ enum pci_barno bar)
+{
+ unsigned long flags = pci_resource_flags(pdev, bar);
+
+ if (!(flags & IORESOURCE_MEM))
+ return false;
+
+ if (flags & (IORESOURCE_UNSET | IORESOURCE_DISABLED))
+ return false;
+
+ return pci_resource_len(pdev, bar) >= PCI_EP_DMA_METADATA_HDR_LEN;
+}
+
+static u32 dw_edma_pcie_ep_dma_readl(struct dw_edma_pcie_ep_dma_view *view,
+ u16 off)
+{
+ return readl(view->base + off);
+}
+
+static void dw_edma_pcie_ep_dma_writel(struct dw_edma_pcie_ep_dma_view *view,
+ u16 off, u32 val)
+{
+ writel(val, view->base + off);
+}
+
+static u64 dw_edma_pcie_ep_dma_read64(struct dw_edma_pcie_ep_dma_view *view,
+ u16 lo, u16 hi)
+{
+ u64 val;
+
+ val = dw_edma_pcie_ep_dma_readl(view, hi);
+
+ return (val << 32) | dw_edma_pcie_ep_dma_readl(view, lo);
+}
+
+static int dw_edma_pcie_ep_dma_read_off(struct dw_edma_pcie_ep_dma_view *view,
+ u16 lo, u16 hi, off_t *off)
+{
+ u64 val;
+
+ val = dw_edma_pcie_ep_dma_read64(view, lo, hi);
+ if (val > type_max(*off))
+ return -EINVAL;
+
+ *off = val;
+
+ return 0;
+}
+
static void dw_edma_pcie_get_synopsys_dma_data(struct pci_dev *pdev,
struct dw_edma_pcie_data *pdata)
{
@@ -315,6 +415,265 @@ static void dw_edma_pcie_get_xilinx_dma_data(struct pci_dev *pdev,
pdata->devmem_phys_off = off;
}
+static int
+dw_edma_pcie_parse_ep_dma_ch_table(struct dw_edma_pcie_ep_dma_view *view,
+ struct dw_edma_pcie_data *pdata,
+ u16 table_off, u16 entry_size, u16 ch_cnt,
+ bool write)
+{
+ struct dw_edma_block *desc_blocks = write ? pdata->ll_wr : pdata->ll_rd;
+ struct dw_edma_block *data_blocks = write ? pdata->dt_wr : pdata->dt_rd;
+ u32 ctrl;
+ u16 i;
+ int ret;
+
+ for (i = 0; i < ch_cnt; i++) {
+ struct dw_edma_block *desc_block = &desc_blocks[i];
+ struct dw_edma_block *data_block = &data_blocks[i];
+ u16 off = table_off + i * entry_size;
+ u16 field, lo, hi;
+
+ field = off + PCI_EP_DMA_METADATA_CH_CTRL;
+ ctrl = dw_edma_pcie_ep_dma_readl(view, field);
+ if (FIELD_GET(PCI_EP_DMA_METADATA_CH_CTRL_HW_CH, ctrl) != i)
+ return -EOPNOTSUPP;
+
+ desc_block->bar =
+ FIELD_GET(PCI_EP_DMA_METADATA_CH_CTRL_DESC_BAR, ctrl);
+ lo = off + PCI_EP_DMA_METADATA_CH_DESC_OFF_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_DESC_OFF_HI;
+ ret = dw_edma_pcie_ep_dma_read_off(view, lo, hi,
+ &desc_block->off);
+ if (ret)
+ return ret;
+ field = off + PCI_EP_DMA_METADATA_CH_DESC_SIZE;
+ desc_block->sz = dw_edma_pcie_ep_dma_readl(view, field);
+ lo = off + PCI_EP_DMA_METADATA_CH_DESC_ADDR_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_DESC_ADDR_HI;
+ desc_block->paddr =
+ dw_edma_pcie_ep_dma_read64(view, lo, hi);
+ desc_block->paddr_valid = true;
+ if (!dw_edma_pcie_valid_block(view->pdev, desc_block))
+ return -EINVAL;
+
+ *data_block = (struct dw_edma_block) { .bar = NO_BAR };
+ if (!(ctrl & PCI_EP_DMA_METADATA_CH_CTRL_AUX_VALID))
+ continue;
+
+ data_block->bar =
+ FIELD_GET(PCI_EP_DMA_METADATA_CH_CTRL_AUX_BAR, ctrl);
+ lo = off + PCI_EP_DMA_METADATA_CH_AUX_OFF_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_AUX_OFF_HI;
+ ret = dw_edma_pcie_ep_dma_read_off(view, lo, hi,
+ &data_block->off);
+ if (ret)
+ return ret;
+ field = off + PCI_EP_DMA_METADATA_CH_AUX_SIZE;
+ data_block->sz = dw_edma_pcie_ep_dma_readl(view, field);
+ lo = off + PCI_EP_DMA_METADATA_CH_AUX_ADDR_LO;
+ hi = off + PCI_EP_DMA_METADATA_CH_AUX_ADDR_HI;
+ data_block->paddr =
+ dw_edma_pcie_ep_dma_read64(view, lo, hi);
+ data_block->paddr_valid = true;
+ if (!dw_edma_pcie_valid_block(view->pdev, data_block))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dw_edma_pcie_ep_dma_wait_ready(struct dw_edma_pcie_ep_dma_view *view)
+{
+ u32 val;
+
+ return read_poll_timeout(dw_edma_pcie_ep_dma_readl, val,
+ val & PCI_EP_DMA_METADATA_CTRL_READY,
+ DW_PCIE_EP_DMA_READY_POLL_US,
+ DW_PCIE_EP_DMA_READY_TIMEOUT_US, false,
+ view, PCI_EP_DMA_METADATA_CTRL);
+}
+
+static int
+dw_edma_pcie_validate_ep_dma_metadata(struct dw_edma_pcie_ep_dma_view *view,
+ u32 *metadata_ctrl, u8 *reg_layout_data)
+{
+ size_t table_size, table_end;
+ enum pci_barno reg_bar;
+ u16 len, entry_size;
+ u16 wr_ch_cnt, rd_ch_cnt;
+ u8 layout, layout_data;
+ u32 val;
+
+ val = dw_edma_pcie_ep_dma_readl(view, 0);
+ if (val != PCI_EP_DMA_METADATA_MAGIC)
+ return -ENODEV;
+
+ val = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_HDR);
+ if (FIELD_GET(PCI_EP_DMA_METADATA_HDR_REV, val) !=
+ PCI_EP_DMA_METADATA_REV)
+ return -EINVAL;
+
+ len = FIELD_GET(PCI_EP_DMA_METADATA_HDR_LEN_FIELD, val);
+ if (len < PCI_EP_DMA_METADATA_HDR_LEN)
+ return -EINVAL;
+ if (len > view->limit)
+ return -EINVAL;
+
+ val = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_REG_LAYOUT);
+ layout = FIELD_GET(PCI_EP_DMA_METADATA_REG_LAYOUT_ID, val);
+ if (layout != PCI_EP_DMA_METADATA_REG_LAYOUT_DW_EDMA)
+ return -EOPNOTSUPP;
+
+ layout_data = FIELD_GET(PCI_EP_DMA_METADATA_REG_LAYOUT_DATA, val);
+ if (layout_data == EDMA_MF_EDMA_LEGACY ||
+ layout_data == EDMA_MF_HDMA_NATIVE)
+ return -EOPNOTSUPP;
+ if (layout_data != EDMA_MF_EDMA_UNROLL &&
+ layout_data != EDMA_MF_HDMA_COMPAT)
+ return -EINVAL;
+
+ val = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_CTRL);
+ reg_bar = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_REG_BAR, val);
+ if (!dw_edma_pcie_valid_bar(reg_bar))
+ return -EINVAL;
+
+ wr_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_WR_CH_COUNT, val);
+ rd_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_RD_CH_COUNT, val);
+ if (!wr_ch_cnt && !rd_ch_cnt)
+ return -EINVAL;
+ if (wr_ch_cnt > EDMA_MAX_WR_CH || rd_ch_cnt > EDMA_MAX_RD_CH)
+ return -EINVAL;
+
+ entry_size = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_CH_ENTRY_SIZE, val);
+ if (entry_size < PCI_EP_DMA_METADATA_CH_ENTRY_SIZE ||
+ entry_size % sizeof(u32))
+ return -EINVAL;
+
+ if (check_mul_overflow((size_t)(wr_ch_cnt + rd_ch_cnt),
+ (size_t)entry_size, &table_size) ||
+ check_add_overflow((size_t)PCI_EP_DMA_METADATA_HDR_LEN,
+ table_size, &table_end) ||
+ table_end > len)
+ return -EINVAL;
+
+ if (metadata_ctrl)
+ *metadata_ctrl = val;
+ if (reg_layout_data)
+ *reg_layout_data = layout_data;
+
+ return 0;
+}
+
+static int
+dw_edma_pcie_parse_ep_dma_data(struct dw_edma_pcie_ep_dma_view *view,
+ struct dw_edma_pcie_data *pdata)
+{
+ u32 ctrl, reg_sz;
+ u8 reg_layout_data;
+ u64 reg_off;
+ u16 wr_table, rd_table, entry_size;
+ u16 wr_ch_cnt, rd_ch_cnt;
+ int ret;
+
+ ret = dw_edma_pcie_validate_ep_dma_metadata(view, &ctrl,
+ ®_layout_data);
+ if (ret)
+ return ret;
+
+ pci_dbg(view->pdev, "Detected PCI endpoint DMA BAR metadata\n");
+
+ pdata->mf = reg_layout_data;
+ pdata->rg.bar = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_REG_BAR, ctrl);
+
+ wr_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_WR_CH_COUNT, ctrl);
+ rd_ch_cnt = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_RD_CH_COUNT, ctrl);
+ pdata->wr_ch_cnt = min_t(u16, pdata->wr_ch_cnt, wr_ch_cnt);
+ pdata->rd_ch_cnt = min_t(u16, pdata->rd_ch_cnt, rd_ch_cnt);
+ pdata->irqs = pdata->wr_ch_cnt + pdata->rd_ch_cnt;
+ reg_off = dw_edma_pcie_ep_dma_read64(view,
+ PCI_EP_DMA_METADATA_REG_OFF_LO,
+ PCI_EP_DMA_METADATA_REG_OFF_HI);
+ reg_sz = dw_edma_pcie_ep_dma_readl(view, PCI_EP_DMA_METADATA_REG_SIZE);
+ if (reg_off > type_max(pdata->rg.off) ||
+ !dw_edma_pcie_valid_bar_range(view->pdev, pdata->rg.bar,
+ reg_off, reg_sz))
+ return -EINVAL;
+ pdata->rg.off = reg_off;
+ pdata->rg.sz = reg_sz;
+
+ entry_size = FIELD_GET(PCI_EP_DMA_METADATA_CTRL_CH_ENTRY_SIZE, ctrl);
+ wr_table = PCI_EP_DMA_METADATA_HDR_LEN;
+ rd_table = PCI_EP_DMA_METADATA_HDR_LEN + wr_ch_cnt * entry_size;
+
+ ret = dw_edma_pcie_parse_ep_dma_ch_table(view, pdata, wr_table,
+ entry_size, pdata->wr_ch_cnt,
+ true);
+ if (ret)
+ return ret;
+
+ return dw_edma_pcie_parse_ep_dma_ch_table(view, pdata, rd_table,
+ entry_size,
+ pdata->rd_ch_cnt, false);
+}
+
+static int
+dw_edma_pcie_parse_ep_dma_caps(struct pci_dev *pdev,
+ struct dw_edma_pcie_data *pdata)
+{
+ struct dw_edma_pcie_ep_dma_view metadata_view;
+ void __iomem *base;
+ resource_size_t bar_len;
+ enum pci_barno bar;
+ u32 ctrl;
+ int ret;
+
+ for (bar = BAR_0; bar < PCI_STD_NUM_BARS; bar++) {
+ if (!dw_edma_pcie_ep_dma_bar_scannable(pdev, bar))
+ continue;
+
+ bar_len = pci_resource_len(pdev, bar);
+ base = pci_iomap_range(pdev, bar, 0, 0);
+ if (!base)
+ continue;
+
+ metadata_view = (struct dw_edma_pcie_ep_dma_view) {
+ .pdev = pdev,
+ .base = base,
+ .limit = bar_len,
+ };
+ ret = dw_edma_pcie_validate_ep_dma_metadata(&metadata_view,
+ NULL, NULL);
+ if (ret == -ENODEV) {
+ pci_iounmap(metadata_view.pdev, base);
+ continue;
+ }
+ if (ret) {
+ pci_iounmap(metadata_view.pdev, base);
+ return ret;
+ }
+
+ ctrl = dw_edma_pcie_ep_dma_readl(&metadata_view,
+ PCI_EP_DMA_METADATA_CTRL);
+ ctrl |= PCI_EP_DMA_METADATA_CTRL_HOST_REQ;
+ dw_edma_pcie_ep_dma_writel(&metadata_view,
+ PCI_EP_DMA_METADATA_CTRL, ctrl);
+
+ ret = dw_edma_pcie_ep_dma_wait_ready(&metadata_view);
+ if (ret) {
+ pci_iounmap(metadata_view.pdev, base);
+ return ret;
+ }
+
+ ret = dw_edma_pcie_parse_ep_dma_data(&metadata_view, pdata);
+ pci_iounmap(metadata_view.pdev, base);
+
+ return ret;
+ }
+
+ return -ENODEV;
+}
+
static int
dw_edma_pcie_parse_synopsys_caps(struct pci_dev *pdev,
struct dw_edma_pcie_data *pdata)
@@ -354,6 +713,15 @@ dw_edma_pcie_parse_xilinx_caps(struct pci_dev *pdev,
return 0;
}
+static const struct dw_edma_pcie_match_data ep_dma_match_data = {
+ .data = &ep_dma_data,
+ .plat_ops = &dw_edma_pcie_raw_addr_plat_ops,
+ .parse_caps = dw_edma_pcie_parse_ep_dma_caps,
+ .flags = DW_EDMA_PCIE_F_REG_OFFSET,
+ .chip_flags = DW_EDMA_CHIP_PARTIAL,
+ .default_irq_mode = DW_EDMA_CH_IRQ_REMOTE,
+};
+
static u64 dw_edma_get_phys_addr(struct pci_dev *pdev,
const struct dw_edma_pcie_match_data *match,
struct dw_edma_pcie_data *pdata,
@@ -381,7 +749,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
const struct pci_device_id *pid)
{
const struct dw_edma_pcie_match_data *match = (void *)pid->driver_data;
- const struct dw_edma_pcie_data *pdata = match->data;
+ const struct dw_edma_pcie_data *pdata;
struct device *dev = &pdev->dev;
struct dw_edma_chip *chip;
int err, nr_irqs;
@@ -394,6 +762,10 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return err;
}
+ if (!match)
+ match = &ep_dma_match_data;
+ pdata = match->data;
+
struct dw_edma_pcie_data *dma_data __free(kfree) =
kmemdup(pdata, sizeof(*dma_data), GFP_KERNEL);
if (!dma_data)
--
2.51.0