Re: [PATCH v5 5/9] iommufd: Add an ioctl to query PA from IOVA for noiommu mode
From: Yi Liu
Date: Wed May 20 2026 - 03:15:45 EST
On 5/12/26 02:41, Jacob Pan wrote:
To support no-IOMMU mode where userspace drivers perform unsafe DMA
using physical addresses, introduce a new API to retrieve the
physical address of a user-allocated DMA buffer that has been mapped to
an IOVA via IOAS. The mapping is backed by SW-only I/O page tables
maintained by the generic IOMMUPT framework.
Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
since this patch is under your ahtuorship, maybe Jason's s-o-b should be prefixed with a c-d-b tag.
Signed-off-by: Jacob Pan <jacob.pan@xxxxxxxxxxxxxxxxxxx>
---
v5:
- Add header stubs for iopt_get_phys() and
iommufd_ioas_noiommu_get_pa() to avoid ifdef at call sites (Kevin)
v4:
- Fix ioctl return type (Yi Liu)
v2:
- New patch
---
drivers/iommu/iommufd/io_pagetable.c | 62 +++++++++++++++++++++++++
drivers/iommu/iommufd/ioas.c | 30 ++++++++++++
drivers/iommu/iommufd/iommufd_private.h | 18 +++++++
drivers/iommu/iommufd/main.c | 3 ++
include/uapi/linux/iommufd.h | 25 ++++++++++
5 files changed, 138 insertions(+)
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 24d4917105d9..1ee7c8e6408c 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -859,6 +859,68 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
}
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
+ u64 *length)
+{
+ struct iopt_area *area;
+ u64 tmp_length = 0;
+ u64 tmp_paddr = 0;
+ int rc = 0;
+
+ down_read(&iopt->iova_rwsem);
+ area = iopt_area_iter_first(iopt, iova, iova);
+ if (!area || !area->pages) {
+ rc = -ENOENT;
+ goto unlock_exit;
+ }
+
+ if (!area->storage_domain ||
+ area->storage_domain->owner != &iommufd_noiommu_ops) {
+ rc = -EOPNOTSUPP;
+ goto unlock_exit;
+ }
+
+ *paddr = iommu_iova_to_phys(area->storage_domain, iova);
+ if (!*paddr) {
+ rc = -EINVAL;
+ goto unlock_exit;
+ }
+
+ tmp_length = PAGE_SIZE - offset_in_page(iova);
+ tmp_paddr = *paddr;
+ /*
+ * Scan the domain for the contiguous physical address length so that
+ * userspace search can be optimized for fewer ioctls.
+ */
+ while (iova < iopt_area_last_iova(area)) {
+ unsigned long next_iova;
+ u64 next_paddr;
+
+ if (check_add_overflow(iova, PAGE_SIZE, &next_iova))
+ break;
+
+ if (next_iova > iopt_area_last_iova(area))
+ break;
+
+ next_paddr = iommu_iova_to_phys(area->storage_domain, next_iova);
+
+ if (!next_paddr || next_paddr != tmp_paddr + PAGE_SIZE)
+ break;
+
+ iova = next_iova;
+ tmp_paddr += PAGE_SIZE;
+ tmp_length += PAGE_SIZE;
+ }
+ *length = tmp_length;
+
+unlock_exit:
+ up_read(&iopt->iova_rwsem);
+
+ return rc;
+}
+#endif
+
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
{
/* If the IOVAs are empty then unmap all succeeds */
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index fed06c2b728e..666440e32c9e 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -375,6 +375,36 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
return rc;
}
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_noiommu_get_pa *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (cmd->flags || cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ rc = iopt_get_phys(&ioas->iopt, cmd->iova, &cmd->out_phys,
+ &cmd->out_length);
+ if (rc)
+ goto out_put;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put:
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
+
+ return rc;
+}
+#endif
+
static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx,
struct xarray *ioas_list)
{
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 2682b5baa6e9..13f1506d8066 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -118,6 +118,16 @@ int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, unsigned long *unmapped);
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova, u64 *paddr,
+ u64 *length);
+#else
+static inline int iopt_get_phys(struct io_pagetable *iopt, unsigned long iova,
+ u64 *paddr, u64 *length)
+{
+ return -EOPNOTSUPP;
+}
+#endif
int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain,
@@ -346,6 +356,14 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
+#ifdef CONFIG_IOMMUFD_NOIOMMU
+int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd);
+#else
+static inline int iommufd_ioas_noiommu_get_pa(struct iommufd_ucmd *ucmd)
+{
+ return -EOPNOTSUPP;
+}
+#endif
int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 8c6d43601afb..3b4192d70570 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -424,6 +424,7 @@ union ucmd_buffer {
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
struct iommu_ioas_copy ioas_copy;
+ struct iommu_ioas_noiommu_get_pa noiommu_get_pa;
struct iommu_ioas_iova_ranges iova_ranges;
struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap;
@@ -482,6 +483,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova),
IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file,
struct iommu_ioas_map_file, iova),
+ IOCTL_OP(IOMMU_IOAS_NOIOMMU_GET_PA, iommufd_ioas_noiommu_get_pa, struct iommu_ioas_noiommu_get_pa,
+ out_phys),
IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
length),
IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index e998dfbd6960..7df366d161f1 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -57,6 +57,7 @@ enum {
IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
+ IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA = 0x95,
};
/**
@@ -219,6 +220,30 @@ struct iommu_ioas_map {
};
#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
+/**
+ * struct iommu_ioas_noiommu_get_pa - ioctl(IOMMU_IOAS_NOIOMMU_GET_PA)
+ * @size: sizeof(struct iommu_ioas_noiommu_get_pa)
+ * @flags: Reserved, must be 0 for now
+ * @ioas_id: IOAS ID to query IOVA to PA mapping from
+ * @__reserved: Must be 0
+ * @iova: IOVA to query
+ * @out_length: Number of bytes contiguous physical address starting from phys
+ * @out_phys: Output physical address the IOVA maps to
+ *
+ * Query the physical address backing an IOVA range. The entire range must be
+ * mapped already. For noiommu devices doing unsafe DMA only.
+ */
+struct iommu_ioas_noiommu_get_pa {
+ __u32 size;
+ __u32 flags;
+ __u32 ioas_id;
+ __u32 __reserved;
+ __aligned_u64 iova;
+ __aligned_u64 out_length;
+ __aligned_u64 out_phys;
+};
+#define IOMMU_IOAS_NOIOMMU_GET_PA _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_NOIOMMU_GET_PA)
can this work for normal vfio usage as well? e.g. vfio usages that open
devices that have iommu procetction.
My concern is kernel does not prevent normal vfio usage from invoking
this ioctl. If it works for normal vfio usage as well, may just remove
the noiommu term from thie name. If not, may be
+
/**
* struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
* @size: sizeof(struct iommu_ioas_map_file)