[PATCH 1/3] dmaengine: Add multi-buffer support in single DMA transfer
From: Sumit Kumar
Date: Fri Mar 13 2026 - 02:51:20 EST
Add dmaengine_prep_batch_sg API for batching multiple independent buffers
in a single DMA transaction. Each scatter-gather entry specifies both
source and destination addresses. This allows multiple non-contiguous
memory regions to be transferred in a single DMA transaction instead of
separate operations, significantly reducing submission overhead and
interrupt overhead.
Extends struct scatterlist with optional dma_dst_address field
and implements support in dw-edma driver.
Signed-off-by: Sumit Kumar <sumit.kumar@xxxxxxxxxxxxxxxx>
---
drivers/dma/dw-edma/Kconfig | 1 +
drivers/dma/dw-edma/dw-edma-core.c | 40 ++++++++++++++++++++++++++++++++++----
drivers/dma/dw-edma/dw-edma-core.h | 3 ++-
include/linux/dmaengine.h | 29 ++++++++++++++++++++++++++-
include/linux/scatterlist.h | 7 +++++++
kernel/dma/Kconfig | 3 +++
6 files changed, 77 insertions(+), 6 deletions(-)
diff --git a/drivers/dma/dw-edma/Kconfig b/drivers/dma/dw-edma/Kconfig
index 2b6f2679508d93b94b7efecd4e36d5902f7b4c99..0472a6554ff38d4cf172a90b6bf0bdaa9e7f4b95 100644
--- a/drivers/dma/dw-edma/Kconfig
+++ b/drivers/dma/dw-edma/Kconfig
@@ -5,6 +5,7 @@ config DW_EDMA
depends on PCI && PCI_MSI
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
+ select NEED_SG_DMA_DST_ADDR
help
Support the Synopsys DesignWare eDMA controller, normally
implemented on endpoints SoCs.
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index 8e5f7defa6b678eefe0f312ebc59f654677c744f..04314cfd82edbed6ed3665eb4c8e6b428339c207 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -411,6 +411,9 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
return NULL;
if (!xfer->xfer.il->src_inc || !xfer->xfer.il->dst_inc)
return NULL;
+ } else if (xfer->type == EDMA_XFER_DUAL_ADDR_SG) {
+ if (xfer->xfer.sg.len < 1)
+ return NULL;
} else {
return NULL;
}
@@ -438,7 +441,7 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
if (xfer->type == EDMA_XFER_CYCLIC) {
cnt = xfer->xfer.cyclic.cnt;
- } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER || xfer->type == EDMA_XFER_DUAL_ADDR_SG) {
cnt = xfer->xfer.sg.len;
sg = xfer->xfer.sg.sgl;
} else if (xfer->type == EDMA_XFER_INTERLEAVED) {
@@ -447,7 +450,8 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
}
for (i = 0; i < cnt; i++) {
- if (xfer->type == EDMA_XFER_SCATTER_GATHER && !sg)
+ if ((xfer->type == EDMA_XFER_SCATTER_GATHER ||
+ xfer->type == EDMA_XFER_DUAL_ADDR_SG) && !sg)
break;
if (chunk->bursts_alloc == chan->ll_max) {
@@ -462,7 +466,8 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
if (xfer->type == EDMA_XFER_CYCLIC)
burst->sz = xfer->xfer.cyclic.len;
- else if (xfer->type == EDMA_XFER_SCATTER_GATHER)
+ else if (xfer->type == EDMA_XFER_SCATTER_GATHER ||
+ xfer->type == EDMA_XFER_DUAL_ADDR_SG)
burst->sz = sg_dma_len(sg);
else if (xfer->type == EDMA_XFER_INTERLEAVED)
burst->sz = xfer->xfer.il->sgl[i % fsz].size;
@@ -486,6 +491,9 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
*/
} else if (xfer->type == EDMA_XFER_INTERLEAVED) {
burst->dar = dst_addr;
+ } else if (xfer->type == EDMA_XFER_DUAL_ADDR_SG) {
+ burst->sar = dw_edma_get_pci_address(chan, sg_dma_address(sg));
+ burst->dar = sg_dma_dst_address(sg);
}
} else {
burst->dar = dst_addr;
@@ -503,10 +511,14 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
*/
} else if (xfer->type == EDMA_XFER_INTERLEAVED) {
burst->sar = src_addr;
+ } else if (xfer->type == EDMA_XFER_DUAL_ADDR_SG) {
+ burst->sar = sg_dma_address(sg);
+ burst->dar = dw_edma_get_pci_address(chan, sg_dma_dst_address(sg));
}
}
- if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ if (xfer->type == EDMA_XFER_SCATTER_GATHER ||
+ xfer->type == EDMA_XFER_DUAL_ADDR_SG) {
sg = sg_next(sg);
} else if (xfer->type == EDMA_XFER_INTERLEAVED) {
struct dma_interleaved_template *il = xfer->xfer.il;
@@ -603,6 +615,25 @@ static void dw_hdma_set_callback_result(struct virt_dma_desc *vd,
res->residue = residue;
}
+static struct dma_async_tx_descriptor *
+dw_edma_device_prep_batch_sg_dma(struct dma_chan *dchan,
+ struct scatterlist *sg,
+ unsigned int nents,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct dw_edma_transfer xfer;
+
+ xfer.dchan = dchan;
+ xfer.direction = direction;
+ xfer.xfer.sg.sgl = sg;
+ xfer.xfer.sg.len = nents;
+ xfer.flags = flags;
+ xfer.type = EDMA_XFER_DUAL_ADDR_SG;
+
+ return dw_edma_device_transfer(&xfer);
+}
+
static void dw_edma_done_interrupt(struct dw_edma_chan *chan)
{
struct dw_edma_desc *desc;
@@ -818,6 +849,7 @@ static int dw_edma_channel_setup(struct dw_edma *dw, u32 wr_alloc, u32 rd_alloc)
dma->device_prep_slave_sg = dw_edma_device_prep_slave_sg;
dma->device_prep_dma_cyclic = dw_edma_device_prep_dma_cyclic;
dma->device_prep_interleaved_dma = dw_edma_device_prep_interleaved_dma;
+ dma->device_prep_batch_sg_dma = dw_edma_device_prep_batch_sg_dma;
dma_set_max_seg_size(dma->dev, U32_MAX);
diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h
index 71894b9e0b1539c636171738963e80a0a5ef43a4..1a266dc58315edb3d5fd9eddb19fc350f1ed9a1b 100644
--- a/drivers/dma/dw-edma/dw-edma-core.h
+++ b/drivers/dma/dw-edma/dw-edma-core.h
@@ -36,7 +36,8 @@ enum dw_edma_status {
enum dw_edma_xfer_type {
EDMA_XFER_SCATTER_GATHER = 0,
EDMA_XFER_CYCLIC,
- EDMA_XFER_INTERLEAVED
+ EDMA_XFER_INTERLEAVED,
+ EDMA_XFER_DUAL_ADDR_SG,
};
struct dw_edma_chan;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 99efe2b9b4ea9844ca6161208362ef18ef111d96..fdba75b5c40f805904a6697fce3062303fea762a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -939,7 +939,11 @@ struct dma_device {
size_t period_len, enum dma_transfer_direction direction,
unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
- struct dma_chan *chan, struct dma_interleaved_template *xt,
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_batch_sg_dma)
+ (struct dma_chan *chan, struct scatterlist *sg, unsigned int nents,
+ enum dma_transfer_direction direction,
unsigned long flags);
void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps);
@@ -1060,6 +1064,29 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
return chan->device->device_prep_interleaved_dma(chan, xt, flags);
}
+/**
+ * dmaengine_prep_batch_sg_dma() - Prepare single DMA transfer for multiple independent buffers.
+ * @chan: DMA channel
+ * @sg: Scatter-gather list with both source (dma_address) and destination (dma_dst_address)
+ * @nents: Number of entries in the list
+ * @direction: Transfer direction (DMA_MEM_TO_MEM, DMA_DEV_TO_MEM, DMA_MEM_TO_DEV)
+ * @flags: DMA engine flags
+ *
+ * Each SG entry contains both source (sg_dma_address) and destination (sg_dma_dst_address).
+ * This allows multiple independent transfers in a single DMA transaction.
+ * Requires CONFIG_NEED_SG_DMA_DST_ADDR to be enabled.
+ */
+static inline struct dma_async_tx_descriptor *dmaengine_prep_batch_sg_dma
+ (struct dma_chan *chan, struct scatterlist *sg, unsigned int nents,
+ enum dma_transfer_direction direction, unsigned long flags)
+{
+ if (!chan || !chan->device || !chan->device->device_prep_batch_sg_dma ||
+ !sg || !nents)
+ return NULL;
+
+ return chan->device->device_prep_batch_sg_dma(chan, sg, nents, direction, flags);
+}
+
/**
* dmaengine_prep_dma_memset() - Prepare a DMA memset descriptor.
* @chan: The channel to be used for this descriptor
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 29f6ceb98d74b118d08b6a3d4eb7f62dcde0495d..20b65ffcd5e2a65ec5026a29344caf6baa09700b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -19,6 +19,9 @@ struct scatterlist {
#ifdef CONFIG_NEED_SG_DMA_FLAGS
unsigned int dma_flags;
#endif
+#ifdef CONFIG_NEED_SG_DMA_DST_ADDR
+ dma_addr_t dma_dst_address;
+#endif
};
/*
@@ -36,6 +39,10 @@ struct scatterlist {
#define sg_dma_len(sg) ((sg)->length)
#endif
+#ifdef CONFIG_NEED_SG_DMA_DST_ADDR
+#define sg_dma_dst_address(sg) ((sg)->dma_dst_address)
+#endif
+
struct sg_table {
struct scatterlist *sgl; /* the list */
unsigned int nents; /* number of mapped entries */
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 31cfdb6b4bc3e33c239111955d97b3ec160baafa..3539b5b1efe27be7ccbfebb358dbb9cad2868f11 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -32,6 +32,9 @@ config NEED_SG_DMA_LENGTH
config NEED_DMA_MAP_STATE
bool
+config NEED_SG_DMA_DST_ADDR
+ bool
+
config ARCH_DMA_ADDR_T_64BIT
def_bool 64BIT || PHYS_ADDR_T_64BIT
--
2.34.1