[PATCH 13/15] dmaengine: dw-edma: Add auxiliary-bus frontend for exported eDMA

From: Koichiro Den

Date: Thu Mar 12 2026 - 12:58:10 EST


Add a small auxiliary-bus frontend that binds to the DMA child created
by ntb_hw_epf when a peer advertises exported DMA ABI v1.

The frontend reads the ABI header from the exported BAR slice, wires the
controller register window and per-channel descriptor windows into a
struct dw_edma_chip, and then calls into the common dw-edma core.

Use the parent PCI device as chip->dev so IRQ lookup and MSI message
composition continue to use the vectors owned by the NTB PCI function.
Default delegated channels to remote interrupt delivery.

Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
---
drivers/dma/dw-edma/Kconfig | 11 ++
drivers/dma/dw-edma/Makefile | 1 +
drivers/dma/dw-edma/dw-edma-aux.c | 297 ++++++++++++++++++++++++++++++
3 files changed, 309 insertions(+)
create mode 100644 drivers/dma/dw-edma/dw-edma-aux.c

diff --git a/drivers/dma/dw-edma/Kconfig b/drivers/dma/dw-edma/Kconfig
index 2b6f2679508d..a31f6bd784c2 100644
--- a/drivers/dma/dw-edma/Kconfig
+++ b/drivers/dma/dw-edma/Kconfig
@@ -19,4 +19,15 @@ config DW_EDMA_PCIE
eDMA controller and an endpoint PCIe device. This also serves
as a reference design to whom desires to use this IP.

+config DW_EDMA_AUX
+ tristate "Synopsys DesignWare eDMA auxiliary-bus frontend"
+ depends on AUXILIARY_BUS
+ help
+ Build a frontend for an endpoint-integrated Synopsys
+ DesignWare eDMA controller discovered through an auxiliary
+ device, such as the child created by ntb_hw_epf for an
+ endpoint DMA engine exported through vNTB. The driver maps the
+ exported control and descriptor windows and registers the
+ remote engine with DMAEngine.
+
endif # DW_EDMA
diff --git a/drivers/dma/dw-edma/Makefile b/drivers/dma/dw-edma/Makefile
index 83ab58f87760..2545ce4a1989 100644
--- a/drivers/dma/dw-edma/Makefile
+++ b/drivers/dma/dw-edma/Makefile
@@ -7,3 +7,4 @@ dw-edma-objs := dw-edma-core.o \
dw-edma-v0-core.o \
dw-hdma-v0-core.o $(dw-edma-y)
obj-$(CONFIG_DW_EDMA_PCIE) += dw-edma-pcie.o
+obj-$(CONFIG_DW_EDMA_AUX) += dw-edma-aux.o
diff --git a/drivers/dma/dw-edma/dw-edma-aux.c b/drivers/dma/dw-edma/dw-edma-aux.c
new file mode 100644
index 000000000000..a7481fa289d0
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-aux.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare eDMA auxiliary-bus frontend
+ */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/device.h>
+#include <linux/dma/edma.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci-ep-dma.h>
+#include <linux/property.h>
+
+struct dw_edma_aux {
+ struct list_head node;
+ struct pci_dev *pdev;
+ struct dw_edma_chip chip;
+ void __iomem *bar_base;
+ void __iomem *ctrl_base;
+ u32 irq_base;
+};
+
+static DEFINE_MUTEX(dw_edma_aux_lock);
+static LIST_HEAD(dw_edma_aux_list);
+
+static struct dw_edma_aux *dw_edma_aux_find_by_pdev(struct pci_dev *pdev)
+{
+ struct dw_edma_aux *aux;
+
+ list_for_each_entry(aux, &dw_edma_aux_list, node)
+ if (aux->pdev == pdev)
+ return aux;
+
+ return NULL;
+}
+
+static int dw_edma_aux_register(struct dw_edma_aux *aux)
+{
+ int ret = 0;
+
+ mutex_lock(&dw_edma_aux_lock);
+ if (dw_edma_aux_find_by_pdev(aux->pdev))
+ ret = -EEXIST;
+ else
+ list_add_tail(&aux->node, &dw_edma_aux_list);
+ mutex_unlock(&dw_edma_aux_lock);
+
+ return ret;
+}
+
+static void dw_edma_aux_unregister(struct dw_edma_aux *aux)
+{
+ mutex_lock(&dw_edma_aux_lock);
+ if (!list_empty(&aux->node))
+ list_del_init(&aux->node);
+ mutex_unlock(&dw_edma_aux_lock);
+}
+
+static bool dw_edma_aux_region_valid(u32 dma_off, u32 dma_sz, u32 off, u32 sz)
+{
+ u32 rel;
+
+ if (off < dma_off || !sz)
+ return false;
+
+ rel = off - dma_off;
+ if (rel > dma_sz)
+ return false;
+
+ return sz <= dma_sz - rel;
+}
+
+static int dw_edma_aux_irq_vector(struct device *dev, unsigned int nr)
+{
+ struct dw_edma_aux *aux;
+ int irq = -ENODEV;
+
+ mutex_lock(&dw_edma_aux_lock);
+ aux = dw_edma_aux_find_by_pdev(to_pci_dev(dev));
+ if (aux)
+ irq = pci_irq_vector(aux->pdev, aux->irq_base + nr);
+ mutex_unlock(&dw_edma_aux_lock);
+
+ return irq;
+}
+
+static const struct dw_edma_plat_ops dw_edma_aux_plat_ops = {
+ .irq_vector = dw_edma_aux_irq_vector,
+};
+
+static int dw_edma_aux_probe(struct auxiliary_device *auxdev,
+ const struct auxiliary_device_id *id)
+{
+ void __iomem *base, *ctrl_base = NULL, *reg_base = NULL;
+ struct device *dev = &auxdev->dev;
+ struct pci_dev *pdev = to_pci_dev(dev->parent);
+ struct pci_ep_dma_hdr_v1 hdr;
+ struct dw_edma_chip *chip;
+ struct dw_edma_aux *aux;
+ u32 dma_abi, dma_bar, dma_offset, dma_size;
+ u32 desc_bar, desc_offset, desc_size;
+ u32 ctrl_bar, ctrl_offset, ctrl_size;
+ u32 irq_base, irq_count, num_chans;
+ u16 hdr_size, total_size;
+ u32 hdr_irq_count;
+ unsigned int i;
+ int ret;
+
+ ret = device_property_read_u32(dev, "dma-abi", &dma_abi);
+ if (ret)
+ return ret;
+ if (dma_abi != PCI_EP_DMA_ABI_V1)
+ return -EINVAL;
+
+ ret = device_property_read_u32(dev, "dma-bar", &dma_bar);
+ if (ret)
+ return ret;
+ ret = device_property_read_u32(dev, "dma-offset", &dma_offset);
+ if (ret)
+ return ret;
+ ret = device_property_read_u32(dev, "dma-size", &dma_size);
+ if (ret)
+ return ret;
+ if (dma_bar > BAR_5 || !dma_size)
+ return -EINVAL;
+ if (dma_size < sizeof(hdr))
+ return -EINVAL;
+ ret = device_property_read_u32(dev, "dma-irq-base", &irq_base);
+ if (ret)
+ return ret;
+ ret = device_property_read_u32(dev, "dma-irq-count", &irq_count);
+ if (ret)
+ return ret;
+ if (!irq_count)
+ return -EINVAL;
+
+ base = pci_iomap_range(pdev, dma_bar, dma_offset, dma_size);
+ if (!base)
+ return -ENOMEM;
+
+ memcpy_fromio(&hdr, base, sizeof(hdr));
+ if (le32_to_cpu(hdr.magic) != PCI_EP_DMA_MAGIC) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+ if (le16_to_cpu(hdr.version) != 1) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+
+ hdr_size = le16_to_cpu(hdr.hdr_size);
+ total_size = le32_to_cpu(hdr.total_size);
+ hdr_irq_count = le32_to_cpu(hdr.irq_count);
+ if (hdr_size != sizeof(hdr) || total_size != dma_size ||
+ hdr_irq_count != irq_count) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+
+ ctrl_bar = le32_to_cpu(hdr.ctrl_bar);
+ ctrl_offset = le32_to_cpu(hdr.ctrl_offset);
+ ctrl_size = le32_to_cpu(hdr.ctrl_size);
+ num_chans = le32_to_cpu(hdr.num_chans);
+ if (!num_chans || num_chans > PCI_EP_DMA_MAX_CHANS ||
+ num_chans > irq_count) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+
+ if (!ctrl_size || ctrl_bar > BAR_5) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+
+ /*
+ * The exported DMA window is only guaranteed to cover the locator and
+ * descriptor subranges. The live eDMA register block may be advertised
+ * through another BAR, or elsewhere in the same BAR, and must then be
+ * mapped directly.
+ */
+ if (ctrl_bar == dma_bar &&
+ dw_edma_aux_region_valid(dma_offset, dma_size, ctrl_offset,
+ ctrl_size)) {
+ ctrl_base = NULL;
+ reg_base = base + (ctrl_offset - dma_offset);
+ } else {
+ ctrl_base = pci_iomap_range(pdev, ctrl_bar, ctrl_offset,
+ ctrl_size);
+ if (!ctrl_base) {
+ ret = -ENOMEM;
+ goto err_iounmap;
+ }
+ reg_base = ctrl_base;
+ }
+
+ for (i = 0; i < num_chans; i++) {
+ desc_bar = le32_to_cpu(hdr.chans[i].desc_bar);
+ desc_offset = le32_to_cpu(hdr.chans[i].desc_offset);
+ desc_size = le32_to_cpu(hdr.chans[i].desc_size);
+ if (desc_bar != dma_bar ||
+ !dw_edma_aux_region_valid(dma_offset, dma_size, desc_offset,
+ desc_size)) {
+ ret = -EINVAL;
+ goto err_ctrl_iounmap;
+ }
+ }
+
+ aux = devm_kzalloc(dev, sizeof(*aux), GFP_KERNEL);
+ if (!aux) {
+ ret = -ENOMEM;
+ goto err_ctrl_iounmap;
+ }
+
+ INIT_LIST_HEAD(&aux->node);
+ aux->pdev = pdev;
+ aux->bar_base = base;
+ aux->ctrl_base = ctrl_base;
+ aux->irq_base = irq_base;
+
+ ret = dw_edma_aux_register(aux);
+ if (ret)
+ goto err_ctrl_iounmap;
+
+ chip = &aux->chip;
+ chip->dev = dev->parent;
+ chip->nr_irqs = irq_count;
+ chip->ops = &dw_edma_aux_plat_ops;
+ chip->flags = 0;
+ chip->mf = EDMA_MF_EDMA_UNROLL;
+ chip->default_irq_mode = DW_EDMA_CH_IRQ_REMOTE;
+ chip->reg_base = reg_base;
+ chip->ll_wr_cnt = 0;
+ /*
+ * ABI v1 exports READ channels only. @hdr->chans[] is defined as a dense
+ * prefix of the remote hardware READ-channel space, ordered by remote
+ * hardware READ-channel index starting at 0, so ll_region_rd[i] can be
+ * consumed as local READ channel i.
+ */
+ chip->ll_rd_cnt = num_chans;
+ for (i = 0; i < num_chans; i++) {
+ desc_offset = le32_to_cpu(hdr.chans[i].desc_offset);
+ desc_size = le32_to_cpu(hdr.chans[i].desc_size);
+ chip->ll_region_rd[i].vaddr.io = base + (desc_offset - dma_offset);
+ chip->ll_region_rd[i].paddr = le64_to_cpu(hdr.chans[i].desc_phys_addr);
+ chip->ll_region_rd[i].sz = desc_size;
+ }
+
+ dev_set_drvdata(dev, aux);
+ ret = dw_edma_probe(chip);
+ if (ret) {
+ dw_edma_aux_unregister(aux);
+ goto err_ctrl_iounmap;
+ }
+
+ return 0;
+
+err_ctrl_iounmap:
+ if (ctrl_base)
+ pci_iounmap(pdev, ctrl_base);
+err_iounmap:
+ pci_iounmap(pdev, base);
+ return ret;
+}
+
+static void dw_edma_aux_remove(struct auxiliary_device *auxdev)
+{
+ struct dw_edma_aux *aux = dev_get_drvdata(&auxdev->dev);
+
+ if (!aux)
+ return;
+
+ dw_edma_remove(&aux->chip);
+ dw_edma_aux_unregister(aux);
+ if (aux->ctrl_base)
+ pci_iounmap(aux->pdev, aux->ctrl_base);
+ pci_iounmap(aux->pdev, aux->bar_base);
+}
+
+static const struct auxiliary_device_id dw_edma_aux_ids[] = {
+ { .name = "ntb_hw_epf.ep_dma_v1" },
+ { }
+};
+MODULE_DEVICE_TABLE(auxiliary, dw_edma_aux_ids);
+
+static struct auxiliary_driver dw_edma_aux_driver = {
+ .name = "dw_edma_aux",
+ .probe = dw_edma_aux_probe,
+ .remove = dw_edma_aux_remove,
+ .id_table = dw_edma_aux_ids,
+};
+module_auxiliary_driver(dw_edma_aux_driver);
+
+MODULE_DESCRIPTION("Synopsys DesignWare eDMA auxiliary-bus frontend");
+MODULE_AUTHOR("Koichiro Den <den@xxxxxxxxxxxxx>");
+MODULE_LICENSE("GPL");
--
2.51.0