[PATCH v9 4/6] iommufd: Add an ioctl to query PA from IOVA for noiommu mode

From: Jacob Pan

Date: Thu Jun 11 2026 - 13:40:41 EST


To support no-IOMMU mode where userspace drivers perform unsafe DMA
using physical addresses, introduce a new API to retrieve the
physical address of a user-allocated DMA buffer that has been mapped to
an IOVA via IOMMU_IOAS_MAP. The mapping is backed by SW-only I/O page
tables maintained by the GENERIC_PT framework.

Reviewed-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Co-developed-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Jacob Pan <jacob.pan@xxxxxxxxxxxxxxxxxxx>
---
v9
- Make no-IOMMU GET_PA length a real upper bound and reject zero length,
avoiding an unbounded scan while holding IOAS locks. This matches the
bounded-range semantics expected by the incoming
iommu_iova_to_phys_length() helper.
v8:
- Fix comment on start IOVA range (Kevin)
v7:
- Fix commit message (Yi)
- Avoid duplicated tmp_length settting (yi)
- Handle race with dma-buf revoke pages (Sashiko)
v6:
- Limit search length (Baolu, Jason)
v5:
- Fix next_iova exceeds iopt_area_last_iova (Alex)
- Rename IOCTL more specific to NOIOMMU, i.e.
IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA (Kevin)
- Add header stubs for iopt_get_phys()
v4:
- Fix ioctl return type (Yi Liu)
---
drivers/iommu/iommufd/io_pagetable.c | 78 +++++++++++++++++++++++++
drivers/iommu/iommufd/ioas.c | 36 ++++++++++++
drivers/iommu/iommufd/iommufd_private.h | 18 ++++++
drivers/iommu/iommufd/main.c | 3 +
include/uapi/linux/iommufd.h | 28 +++++++++
5 files changed, 163 insertions(+)

diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 24d4917105d9..6a8573dc206a 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -859,6 +859,84 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
}

+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
+ u64 *length)
+{
+ struct iopt_area *area;
+ struct iopt_pages *pages;
+ u64 max_length = *length;
+ u64 tmp_length = 0;
+ u64 tmp_paddr = 0;
+ int rc = 0;
+
+ if (!max_length)
+ return -EINVAL;
+
+ down_read(&iopt->iova_rwsem);
+ area = iopt_area_iter_first(iopt, iova, iova);
+ if (!area || !area->pages) {
+ rc = -ENOENT;
+ goto unlock_exit;
+ }
+
+ pages = area->pages;
+ mutex_lock(&pages->mutex);
+ if (iopt_dmabuf_revoked(pages)) {
+ rc = -EINVAL;
+ goto unlock_pages;
+ }
+
+ if (!area->storage_domain ||
+ area->storage_domain->owner != &iommufd_noiommu_ops) {
+ rc = -EOPNOTSUPP;
+ goto unlock_pages;
+ }
+
+ *paddr = iommu_iova_to_phys(area->storage_domain, iova);
+ tmp_length = min_t(u64, PAGE_SIZE - offset_in_page(iova),
+ iopt_area_last_iova(area) - iova + 1);
+ tmp_paddr = *paddr;
+ /*
+ * Return the physically contiguous length, capped by the caller
+ * supplied range length.
+ */
+ while (iova < iopt_area_last_iova(area)) {
+ unsigned long next_iova;
+ u64 next_paddr;
+
+ if (tmp_length >= max_length)
+ break;
+
+ if (check_add_overflow(iova, PAGE_SIZE, &next_iova))
+ break;
+
+ if (next_iova > iopt_area_last_iova(area))
+ break;
+
+ next_paddr = iommu_iova_to_phys(area->storage_domain, next_iova);
+
+ if (!next_paddr || next_paddr != tmp_paddr + PAGE_SIZE)
+ break;
+
+ iova = next_iova;
+ tmp_paddr += PAGE_SIZE;
+ tmp_length += PAGE_SIZE;
+ }
+
+ if (tmp_length > max_length)
+ tmp_length = max_length;
+ *length = tmp_length;
+
+unlock_pages:
+ mutex_unlock(&pages->mutex);
+unlock_exit:
+ up_read(&iopt->iova_rwsem);
+
+ return rc;
+}
+#endif
+
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
{
/* If the IOVAs are empty then unmap all succeeds */
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index fed06c2b728e..1a18cee7456b 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -375,6 +375,42 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
return rc;
}

+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_noiommu_get_pa *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (cmd->flags || cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ if (!cmd->length)
+ return -EINVAL;
+
+ if (cmd->iova >= ULONG_MAX)
+ return -EOVERFLOW;
+
+ ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ rc = iopt_get_phys(&ioas->iopt, cmd->iova, &cmd->out_phys,
+ &cmd->length);
+ if (rc)
+ goto out_put;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put:
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
+
+ return rc;
+}
+#endif
+
static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx,
struct xarray *ioas_list)
{
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index c8ed612e896a..15909ba75c18 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -118,6 +118,16 @@ int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, unsigned long *unmapped);
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
+ u64 *length);
+#else
+static inline int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova,
+ u64 *paddr, u64 *length)
+{
+ return -EOPNOTSUPP;
+}
+#endif

int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain,
@@ -346,6 +356,14 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd);
+#else
+static inline int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
+{
+ return -EOPNOTSUPP;
+}
+#endif
int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index f6ae60bd3f70..a4668995269c 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -424,6 +424,7 @@ union ucmd_buffer {
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
struct iommu_ioas_copy ioas_copy;
+ struct iommu_ioas_noiommu_get_pa noiommu_get_pa;
struct iommu_ioas_iova_ranges iova_ranges;
struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap;
@@ -482,6 +483,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova),
IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file,
struct iommu_ioas_map_file, iova),
+ IOCTL_OP(IOMMU_IOAS_NOIOMMU_GET_PA, iommufd_ioas_noiommu_get_pa, struct iommu_ioas_noiommu_get_pa,
+ out_phys),
IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
length),
IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index e998dfbd6960..1cd3395620e9 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -57,6 +57,7 @@ enum {
IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
+ IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA = 0x95,
};

/**
@@ -219,6 +220,33 @@ struct iommu_ioas_map {
};
#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)

+/**
+ * struct iommu_ioas_noiommu_get_pa - ioctl(IOMMU_IOAS_NOIOMMU_GET_PA)
+ * @size: sizeof(struct iommu_ioas_noiommu_get_pa)
+ * @flags: Reserved, must be 0 for now
+ * @ioas_id: IOAS ID to query IOVA to PA mapping from
+ * @__reserved: Must be 0
+ * @iova: IOVA to query
+ * @length: On input, non-zero maximum number of bytes to query starting from
+ * @iova. On output, number of physically contiguous bytes starting
+ * from @out_phys, capped by the input length.
+ * @out_phys: Output physical address the IOVA maps to
+ *
+ * Query the physical address backing an IOVA range. The beginning of the
+ * range must be mapped already and length must be non-zero. For noiommu
+ * devices doing unsafe DMA only.
+ */
+struct iommu_ioas_noiommu_get_pa {
+ __u32 size;
+ __u32 flags;
+ __u32 ioas_id;
+ __u32 __reserved;
+ __aligned_u64 iova;
+ __aligned_u64 length;
+ __aligned_u64 out_phys;
+};
+#define IOMMU_IOAS_NOIOMMU_GET_PA _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA)
+
/**
* struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
* @size: sizeof(struct iommu_ioas_map_file)
--
2.43.0