[PATCH 2/3] PCI: epf-mhi: Add batched DMA read support

From: Sumit Kumar

Date: Fri Mar 13 2026 - 02:51:52 EST


Add support for batched DMA transfers in the PCI EPF MHI driver to
improve performance when reading multiple buffers from the host.

Implement two variants of the read_batch() callback:
- pci_epf_mhi_edma_read_batch(): DMA-optimized implementation using
dmaengine_prep_batch_sg_dma() to transfer multiple buffers in a single
DMA transaction.
- pci_epf_mhi_iatu_read_batch(): CPU-copy fallback that sequentially
processes buffers using IATU.

This enables the MHI endpoint stack to efficiently cache ring data,
particularly for wraparound scenarios where ring data spans two
non-contiguous memory regions.

Signed-off-by: Sumit Kumar <sumit.kumar@xxxxxxxxxxxxxxxx>
---
drivers/pci/endpoint/functions/Kconfig | 1 +
drivers/pci/endpoint/functions/pci-epf-mhi.c | 120 +++++++++++++++++++++++++++
include/linux/mhi_ep.h | 3 +
3 files changed, 124 insertions(+)

diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig
index 0c9cea0698d7bd3d8bd11aa1db0195978d9406b9..43131b6db8a2ca57b7a4f0eba8affba3a77f9ad7 100644
--- a/drivers/pci/endpoint/functions/Kconfig
+++ b/drivers/pci/endpoint/functions/Kconfig
@@ -41,6 +41,7 @@ config PCI_EPF_VNTB
config PCI_EPF_MHI
tristate "PCI Endpoint driver for MHI bus"
depends on PCI_ENDPOINT && MHI_BUS_EP
+ select NEED_SG_DMA_DST_ADDR
help
Enable this configuration option to enable the PCI Endpoint
driver for Modem Host Interface (MHI) bus in Qualcomm Endpoint
diff --git a/drivers/pci/endpoint/functions/pci-epf-mhi.c b/drivers/pci/endpoint/functions/pci-epf-mhi.c
index 6643a88c7a0ce38161bc6253c09d29f1c36ba394..198201d734cc2c6d09be229464a8efdafc3cd611 100644
--- a/drivers/pci/endpoint/functions/pci-epf-mhi.c
+++ b/drivers/pci/endpoint/functions/pci-epf-mhi.c
@@ -448,6 +448,124 @@ static int pci_epf_mhi_edma_write(struct mhi_ep_cntrl *mhi_cntrl,
return ret;
}

+static int pci_epf_mhi_iatu_read_batch(struct mhi_ep_cntrl *mhi_cntrl,
+ struct mhi_ep_buf_info *buf_info_array,
+ u32 num_buffers)
+{
+ int ret;
+ u32 i;
+
+ for (i = 0; i < num_buffers; i++) {
+ ret = pci_epf_mhi_iatu_read(mhi_cntrl, &buf_info_array[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int pci_epf_mhi_edma_read_batch(struct mhi_ep_cntrl *mhi_cntrl,
+ struct mhi_ep_buf_info *buf_info_array,
+ u32 num_buffers)
+{
+ struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+ struct device *dma_dev = epf_mhi->epf->epc->dev.parent;
+ struct dma_chan *chan = epf_mhi->dma_chan_rx;
+ struct device *dev = &epf_mhi->epf->dev;
+ struct dma_async_tx_descriptor *desc;
+ struct dma_slave_config config = {};
+ DECLARE_COMPLETION_ONSTACK(complete);
+ struct scatterlist *sg;
+ dma_addr_t *dst_addrs;
+ dma_cookie_t cookie;
+ int ret;
+ u32 i;
+
+ if (num_buffers == 0)
+ return -EINVAL;
+
+ mutex_lock(&epf_mhi->lock);
+
+ sg = kcalloc(num_buffers, sizeof(*sg), GFP_KERNEL);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto err_unlock;
+ }
+
+ dst_addrs = kcalloc(num_buffers, sizeof(*dst_addrs), GFP_KERNEL);
+ if (!dst_addrs) {
+ ret = -ENOMEM;
+ goto err_free_sg;
+ }
+
+ sg_init_table(sg, num_buffers);
+
+ for (i = 0; i < num_buffers; i++) {
+ dst_addrs[i] = dma_map_single(dma_dev, buf_info_array[i].dev_addr,
+ buf_info_array[i].size, DMA_FROM_DEVICE);
+ ret = dma_mapping_error(dma_dev, dst_addrs[i]);
+ if (ret) {
+ dev_err(dev, "Failed to map buffer %u\n", i);
+ goto err_unmap;
+ }
+
+ sg_dma_address(&sg[i]) = buf_info_array[i].host_addr;
+ sg_dma_dst_address(&sg[i]) = dst_addrs[i];
+ sg_dma_len(&sg[i]) = buf_info_array[i].size;
+ }
+
+ config.direction = DMA_DEV_TO_MEM;
+ ret = dmaengine_slave_config(chan, &config);
+ if (ret) {
+ dev_err(dev, "Failed to configure DMA channel\n");
+ goto err_unmap;
+ }
+
+ desc = dmaengine_prep_batch_sg_dma(chan, sg, num_buffers,
+ DMA_DEV_TO_MEM,
+ DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+ if (!desc) {
+ dev_err(dev, "Failed to prepare batch sg DMA\n");
+ ret = -EIO;
+ goto err_unmap;
+ }
+
+ desc->callback = pci_epf_mhi_dma_callback;
+ desc->callback_param = &complete;
+
+ cookie = dmaengine_submit(desc);
+ ret = dma_submit_error(cookie);
+ if (ret) {
+ dev_err(dev, "Failed to submit DMA\n");
+ goto err_unmap;
+ }
+
+ dma_async_issue_pending(chan);
+
+ ret = wait_for_completion_timeout(&complete, msecs_to_jiffies(1000));
+ if (!ret) {
+ dev_err(dev, "DMA transfer timeout\n");
+ dmaengine_terminate_sync(chan);
+ ret = -ETIMEDOUT;
+ goto err_unmap;
+ }
+
+ ret = 0;
+
+err_unmap:
+ for (i = 0; i < num_buffers; i++) {
+ if (dst_addrs[i])
+ dma_unmap_single(dma_dev, dst_addrs[i],
+ buf_info_array[i].size, DMA_FROM_DEVICE);
+ }
+ kfree(dst_addrs);
+err_free_sg:
+ kfree(sg);
+err_unlock:
+ mutex_unlock(&epf_mhi->lock);
+ return ret;
+}
+
static void pci_epf_mhi_dma_worker(struct work_struct *work)
{
struct pci_epf_mhi *epf_mhi = container_of(work, struct pci_epf_mhi, dma_work);
@@ -803,11 +921,13 @@ static int pci_epf_mhi_link_up(struct pci_epf *epf)
mhi_cntrl->unmap_free = pci_epf_mhi_unmap_free;
mhi_cntrl->read_sync = mhi_cntrl->read_async = pci_epf_mhi_iatu_read;
mhi_cntrl->write_sync = mhi_cntrl->write_async = pci_epf_mhi_iatu_write;
+ mhi_cntrl->read_batch = pci_epf_mhi_iatu_read_batch;
if (info->flags & MHI_EPF_USE_DMA) {
mhi_cntrl->read_sync = pci_epf_mhi_edma_read;
mhi_cntrl->write_sync = pci_epf_mhi_edma_write;
mhi_cntrl->read_async = pci_epf_mhi_edma_read_async;
mhi_cntrl->write_async = pci_epf_mhi_edma_write_async;
+ mhi_cntrl->read_batch = pci_epf_mhi_edma_read_batch;
}

/* Register the MHI EP controller */
diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h
index 7b40fc8cbe77ab8419d167e89264b69a817b9fb1..15554f966e4be1aea1f3129c5f26253f5087edba 100644
--- a/include/linux/mhi_ep.h
+++ b/include/linux/mhi_ep.h
@@ -107,6 +107,7 @@ struct mhi_ep_buf_info {
* @write_sync: CB function for writing to host memory synchronously
* @read_async: CB function for reading from host memory asynchronously
* @write_async: CB function for writing to host memory asynchronously
+ * @read_batch: CB function for reading from host memory in batches synchronously
* @mhi_state: MHI Endpoint state
* @max_chan: Maximum channels supported by the endpoint controller
* @mru: MRU (Maximum Receive Unit) value of the endpoint controller
@@ -164,6 +165,8 @@ struct mhi_ep_cntrl {
int (*write_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
int (*read_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
int (*write_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
+ int (*read_batch)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info_array,
+ u32 num_buffers);

enum mhi_state mhi_state;


--
2.34.1