Re: [PATCH v1] iommu/riscv: Add page request queue and IOPF support

From: Gong Shuai

Date: Wed Jun 17 2026 - 10:52:34 EST


On 6/17/2026 5:01 PM, bingyu.xian wrote:
The RISC-V IOMMU Architecture Specification, Version 1.0, Chapter 3.3
defines a Page-request Queue (PQ) for handling PCIe Page Request
Interface (PRI) messages. The current driver implements only the
Command Queue (CQ) and Fault Queue (FQ); the PQ is left
unimplemented, which is the biggest functional gap compared with the
ARM SMMUv3 driver.

Add PQ and IOPF support:

- Initialize and enable the PQ when the hardware advertises ATS
capability (RISCV_IOMMU_CAPABILITIES_ATS), and add an IOMMU_IOPF
Kconfig dependency.
- Implement the PQ interrupt handler riscv_iommu_priq_process(),
which consumes PQ records from the hardware ring buffer.
- Translate PQ records into the kernel's generic iopf_fault format
and pass them to the IOPF framework via
iommu_report_device_fault().
- Implement the .page_response callback, which builds an ATS.PRGR
(Page Request Group Response) command and sends it through the
command queue to notify the requesting device.



Hi, bingyu

Thanks for your work.

Tomasz already has a complete ATS/PRI/SVA support series on GitHub [1],
which hasn't been posted to the mailing list yet.

[1] https://github.com/tjeznach/linux/commits/riscv_iommu.next


Tested on QEMU 10.0 with '-M virt,iommu-sys=on' and
CONFIG_IOMMU_IOPF=y; the PQ comes up cleanly:

riscv,iommu 3010000.iommu: page request queue enabled
Test patch
==========


It would be better to separate the test helper into its own patch rather
than mixing it with the core implementation, which makes it hard to
apply directly.

Regards,
Shuai



diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index b1c2d3e4f5a6..3af3f19de94f 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -16,6 +16,7 @@
#include <linux/acpi_rimt.h>
#include <linux/compiler.h>
#include <linux/crash_dump.h>
+#include <linux/debugfs.h>
#include <linux/init.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
@@ -1608,9 +1609,62 @@ static int riscv_iommu_init_check(struct riscv_iommu_device *iommu)
return 0;
}

+#ifdef CONFIG_DEBUG_FS
+/*
+ * debugfs interface to inject a fake PQ record without real PRI hardware.
+ *
+ * Writing a PCI BDF-encoded device id to the 'inject_pq' file constructs a
+ * synthetic riscv_iommu_pq_record and feeds it directly to
+ * riscv_iommu_handle_pq(), exercising the IOPF report path and, via the
+ * IOPF framework, the .page_response -> ATS.PRGR command path.
+ *
+ * The IRQ-driven priq_process() consumer loop is not exercised here, but it
+ * reuses the same queue infrastructure as the existing fltq_process().
+ */
+static struct dentry *riscv_iommu_debugfs_dir;
+
+static int riscv_iommu_inject_pq_set(void *data, u64 val)
+{
+ struct riscv_iommu_device *iommu = data;
+ struct riscv_iommu_pq_record fake_req;
+ unsigned int devid = (unsigned int)val;
+
+ if (!(iommu->caps & RISCV_IOMMU_CAPABILITIES_ATS)) {
+ dev_err(iommu->dev, "inject_pq: ATS not supported\n");
+ return -ENODEV;
+ }
+
+ memset(&fake_req, 0, sizeof(fake_req));
+ fake_req.hdr = FIELD_PREP(RISCV_IOMMU_PQ_HDR_DID, devid);
+ fake_req.payload = FIELD_PREP(RISCV_IOMMU_PQ_PAYLOAD_ADDR, 0xdead0) |
+ FIELD_PREP(RISCV_IOMMU_PQ_PAYLOAD_PRGI, 1) |
+ RISCV_IOMMU_PQ_PAYLOAD_L |
+ RISCV_IOMMU_PQ_PAYLOAD_R;
+
+ dev_info(iommu->dev,
+ "inject_pq: injecting fake PQ record for devid 0x%x\n", devid);
+ riscv_iommu_handle_pq(iommu, &fake_req);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(riscv_iommu_inject_pq_fops, NULL,
+ riscv_iommu_inject_pq_set, "%llu\n");
+
+static void riscv_iommu_debugfs_init(struct riscv_iommu_device *iommu)
+{
+ riscv_iommu_debugfs_dir = debugfs_create_dir("riscv-iommu", NULL);
+ debugfs_create_file("inject_pq", 0200, riscv_iommu_debugfs_dir,
+ iommu, &riscv_iommu_inject_pq_fops);
+}
+
+static void riscv_iommu_debugfs_remove(void)
+{
+ debugfs_remove_recursive(riscv_iommu_debugfs_dir);
+ riscv_iommu_debugfs_dir = NULL;
+}
+#else
+static inline void riscv_iommu_debugfs_init(struct riscv_iommu_device *iommu) { }
+static inline void riscv_iommu_debugfs_remove(void) { }
+#endif /* CONFIG_DEBUG_FS */
+
void riscv_iommu_remove(struct riscv_iommu_device *iommu)
{
+ riscv_iommu_debugfs_remove();
iommu_device_unregister(&iommu->iommu);
iommu_device_sysfs_remove(&iommu->iommu);
riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF);
@@ -1754,6 +1808,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
goto err_remove_sysfs;
}

+ riscv_iommu_debugfs_init(iommu);
+
return 0;

err_remove_sysfs:
---

Test logs:
root@Ubuntu-riscv64:~# dmesg | grep "iommu"
[ 0.590293] iommu: Default domain type: Translated
[ 0.590379] iommu: DMA domain TLB invalidation policy: lazy mode
[ 1.374106] riscv,iommu 3010000.iommu: failed to find an MSI domain
[ 1.374429] riscv,iommu 3010000.iommu: using wire-signaled interrupts
[ 1.384786] riscv,iommu 3010000.iommu: page request queue enabled
[ 1.391279] pci 0000:00:00.0: Adding to iommu group 0
[ 1.391798] pci 0000:00:01.0: Adding to iommu group 1
[ 1.392023] pci 0000:00:02.0: Adding to iommu group 2
[ 1.392252] pci 0000:00:03.0: Adding to iommu group 3

root@Ubuntu-riscv64:~# echo 8 > /sys/kernel/debug/riscv-iommu/inject_pq

root@Ubuntu-riscv64:~# dmesg | grep "iommu"
[ 0.590293] iommu: Default domain type: Translated
[ 0.590379] iommu: DMA domain TLB invalidation policy: lazy mode
[ 1.374106] riscv,iommu 3010000.iommu: failed to find an MSI domain
[ 1.374429] riscv,iommu 3010000.iommu: using wire-signaled interrupts
[ 1.384786] riscv,iommu 3010000.iommu: page request queue enabled
[ 1.391279] pci 0000:00:00.0: Adding to iommu group 0
[ 1.391798] pci 0000:00:01.0: Adding to iommu group 1
[ 1.392023] pci 0000:00:02.0: Adding to iommu group 2
[ 1.392252] pci 0000:00:03.0: Adding to iommu group 3
[ 1308.301611] riscv,iommu 3010000.iommu: inject_pq: injecting fake PQ record for devid 0x8
[ 1308.303467] riscv,iommu 3010000.iommu: page request fault report failed: -22

Assisted-by: YuanSheng: deepseek-v4-pro
Co-developed-by: Quan Zhou <zhouquan@xxxxxxxxxxx>
Signed-off-by: Quan Zhou <zhouquan@xxxxxxxxxxx>
Signed-off-by: bingyu.xian <shanbeeyoo@xxxxxxxxx>
---
drivers/iommu/riscv/Kconfig | 1 +
drivers/iommu/riscv/iommu.c | 178 +++++++++++++++++++++++++++++++++++-
drivers/iommu/riscv/iommu.h | 3 +
3 files changed, 181 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
index b86e5ab94183..c5abb7b4ba8f 100644
--- a/drivers/iommu/riscv/Kconfig
+++ b/drivers/iommu/riscv/Kconfig
@@ -7,6 +7,7 @@ config RISCV_IOMMU
depends on GENERIC_MSI_IRQ
depends on (RISCV || COMPILE_TEST) && 64BIT
select IOMMU_API
+ select IOMMU_IOPF
select GENERIC_PT
select IOMMU_PT
select IOMMU_PT_RISCV64
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index a31f50bbad35..b1c2d3e4f5a6 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -33,9 +33,10 @@
#define RISCV_IOMMU_DDTP_TIMEOUT 10000000
#define RISCV_IOMMU_IOTINVAL_TIMEOUT 90000000

-/* Number of entries per CMD/FLT queue, should be <= INT_MAX */
+/* Number of entries per CMD/FLT/PRI queue, should be <= INT_MAX */
#define RISCV_IOMMU_DEF_CQ_COUNT 8192
#define RISCV_IOMMU_DEF_FQ_COUNT 4096
+#define RISCV_IOMMU_DEF_PQ_COUNT 4096

/* RISC-V IOMMU PPN <> PHYS address conversions, PHYS <=> PPN[53:10] */
#define phys_to_ppn(pa) (((pa) >> 2) & (((1ULL << 44) - 1) << 10))
@@ -565,6 +566,151 @@ static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
return IRQ_HANDLED;
}


+static struct device *riscv_iommu_find_dev(struct riscv_iommu_device *iommu,
+ unsigned int devid)
+{
+ struct pci_bus *bus;
+ struct pci_dev *pdev;
+
+ bus = pci_find_bus(0, devid >> 8);
+ if (!bus)
+ return NULL;
+
+ pdev = pci_get_slot(bus, devid & 0xff);
+ if (!pdev)
+ return NULL;
+
+ return &pdev->dev;
+}
+
+static void riscv_iommu_handle_pq(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_pq_record *req)
+{
+ struct iopf_fault fault;
+ struct device *dev;
+ unsigned int devid;
+ int ret;
+
+ devid = FIELD_GET(RISCV_IOMMU_PQ_HDR_DID, req->hdr);
+ dev = riscv_iommu_find_dev(iommu, devid);
+ if (!dev) {
+ dev_warn_ratelimited(iommu->dev,
+ "page request for unknown devid 0x%x\n", devid);
+ return;
+ }
+
+ memset(&fault, 0, sizeof(fault));
+ fault.fault.type = IOMMU_FAULT_PAGE_REQ;
+ fault.fault.prm.addr = FIELD_GET(RISCV_IOMMU_PQ_PAYLOAD_ADDR, req->payload) << 12;
+ fault.fault.prm.grpid = FIELD_GET(RISCV_IOMMU_PQ_PAYLOAD_PRGI, req->payload);
+
+ if (req->payload & RISCV_IOMMU_PQ_PAYLOAD_R)
+ fault.fault.prm.perm |= IOMMU_FAULT_PERM_READ;
+ if (req->payload & RISCV_IOMMU_PQ_PAYLOAD_W)
+ fault.fault.prm.perm |= IOMMU_FAULT_PERM_WRITE;
+ if (req->hdr & RISCV_IOMMU_PQ_HDR_EXEC)
+ fault.fault.prm.perm |= IOMMU_FAULT_PERM_EXEC;
+ if (req->hdr & RISCV_IOMMU_PQ_HDR_PRIV)
+ fault.fault.prm.perm |= IOMMU_FAULT_PERM_PRIV;
+
+ if (req->hdr & RISCV_IOMMU_PQ_HDR_PV) {
+ fault.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID |
+ IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ fault.fault.prm.pasid = FIELD_GET(RISCV_IOMMU_PQ_HDR_PID, req->hdr);
+ }
+
+ if (req->payload & RISCV_IOMMU_PQ_PAYLOAD_L)
+ fault.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+ /* Store DID in private_data for page_response to send ATS.PRGR */
+ fault.fault.prm.private_data[0] = devid;
+
+ ret = iommu_report_device_fault(dev, &fault);
+ if (ret) {
+ dev_warn_ratelimited(iommu->dev,
+ "page request fault report failed: %d\n", ret);
+ }
+}
+
+/* Page request queue interrupt handler thread function */
+static irqreturn_t riscv_iommu_priq_process(int irq, void *data)
+{
+ struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu = queue->iommu;
+ struct riscv_iommu_pq_record *requests;
+ unsigned int ctrl, idx;
+ int cnt, len;
+
+ requests = (struct riscv_iommu_pq_record *)queue->base;
+
+ /* Clear page request interrupt pending and process all records. */
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
+
+ do {
+ cnt = riscv_iommu_queue_consume(queue, &idx);
+ for (len = 0; len < cnt; idx++, len++)
+ riscv_iommu_handle_pq(iommu, &requests[Q_ITEM(queue, idx)]);
+ riscv_iommu_queue_release(queue, cnt);
+ } while (cnt > 0);
+
+ /* Clear MF/OF errors */
+ ctrl = riscv_iommu_readl(iommu, queue->qcr);
+ if (ctrl & (RISCV_IOMMU_PQCSR_PQMF | RISCV_IOMMU_PQCSR_PQOF)) {
+ riscv_iommu_writel(iommu, queue->qcr, ctrl);
+ dev_warn(iommu->dev,
+ "Queue #%u error; memory fault:%d overflow:%d\n",
+ queue->qid,
+ !!(ctrl & RISCV_IOMMU_PQCSR_PQMF),
+ !!(ctrl & RISCV_IOMMU_PQCSR_PQOF));
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Send ATS.PRGR page response through the command queue */
+static void riscv_iommu_page_response(struct device *dev,
+ struct iopf_fault *evt,
+ struct iommu_page_response *msg)
+{
+ struct riscv_iommu_device *iommu = dev_to_iommu(dev);
+ struct riscv_iommu_command cmd;
+ unsigned int devid;
+ u8 resp_code;
+
+ /* Recover DID from private_data stored during PQ processing */
+ devid = evt->fault.prm.private_data[0];
+
+ switch (msg->code) {
+ case IOMMU_PAGE_RESP_SUCCESS:
+ resp_code = 0; /* Success */
+ break;
+ case IOMMU_PAGE_RESP_INVALID:
+ resp_code = 1; /* Invalid Request */
+ break;
+ case IOMMU_PAGE_RESP_FAILURE:
+ default:
+ resp_code = 0xF; /* Response Failure */
+ break;
+ }
+
+ /* Build ATS.PRGR command */
+ cmd.dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_ATS_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_ATS_FUNC_PRGR) |
+ FIELD_PREP(RISCV_IOMMU_CMD_ATS_RID, devid);
+
+ if (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
+ cmd.dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_ATS_PID, evt->fault.prm.pasid) |
+ RISCV_IOMMU_CMD_ATS_PV;
+
+ cmd.dword1 = FIELD_PREP(RISCV_IOMMU_CMD_ATS_PRGR_PRG_INDEX, msg->grpid) |
+ FIELD_PREP(RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE, resp_code) |
+ FIELD_PREP(RISCV_IOMMU_CMD_ATS_PRGR_DST_ID, devid);
+
+ riscv_iommu_cmd_send(iommu, &cmd);
+}
+
/* Lookup and initialize device context info structure. */
static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu,
unsigned int devid)
@@ -1404,6 +1550,7 @@ static const struct iommu_ops riscv_iommu_ops = {
.device_group = riscv_iommu_device_group,
.probe_device = riscv_iommu_probe_device,
.release_device = riscv_iommu_release_device,
+ .page_response = riscv_iommu_page_response,
};

static int riscv_iommu_init_check(struct riscv_iommu_device *iommu)
@@ -1466,6 +1613,8 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF);
riscv_iommu_queue_disable(&iommu->cmdq);
riscv_iommu_queue_disable(&iommu->fltq);
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_ATS)
+ riscv_iommu_queue_disable(&iommu->priq);
}

int riscv_iommu_init(struct riscv_iommu_device *iommu)
@@ -1494,6 +1643,15 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
if (rc)
return rc;

+ /* Allocate page request queue if ATS is supported */
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_ATS) {
+ RISCV_IOMMU_QUEUE_INIT(&iommu->priq, PQ);
+ rc = riscv_iommu_queue_alloc(iommu, &iommu->priq,
+ sizeof(struct riscv_iommu_pq_record));
+ if (rc)
+ return rc;
+ }
+
rc = riscv_iommu_queue_enable(iommu, &iommu->cmdq, riscv_iommu_cmdq_process);
if (rc)
return rc;
@@ -1502,6 +1660,15 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
if (rc)
goto err_queue_disable;

+ /* Enable page request queue if ATS is supported */
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_ATS) {
+ rc = riscv_iommu_queue_enable(iommu, &iommu->priq,
+ riscv_iommu_priq_process);
+ if (rc)
+ goto err_queue_disable;
+ dev_info(iommu->dev, "page request queue enabled\n");
+ }
+
rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_MAX);
if (rc)
goto err_queue_disable;
@@ -1534,6 +1701,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
err_iodir_off:
riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF);
err_queue_disable:
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_ATS)
+ riscv_iommu_queue_disable(&iommu->priq);
riscv_iommu_queue_disable(&iommu->fltq);
riscv_iommu_queue_disable(&iommu->cmdq);
return rc;
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 46df79dd5495..5c5ab24539f2 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -14,6 +14,8 @@
#include <linux/iommu.h>
#include <linux/types.h>
#include <linux/iopoll.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>

#include "iommu-bits.h"

@@ -55,6 +57,7 @@ struct riscv_iommu_device {
/* hardware queues */
struct riscv_iommu_queue cmdq;
struct riscv_iommu_queue fltq;
+ struct riscv_iommu_queue priq;

/* device directory */
unsigned int ddt_mode;
--
2.53.0


_______________________________________________
linux-riscv mailing list
linux-riscv@xxxxxxxxxxxxxxxxxxx
http://lists.infradead.org/mailman/listinfo/linux-riscv