[PATCH v3 09/22] iommu/amd: Introduce and map vIOMMU private IPA region

From: Suravee Suthikulpanit

Date: Mon Jun 29 2026 - 11:48:47 EST


AMD vIOMMU introduces the IOMMU Private Address (IPA) region for
guest-side IOMMU virtualization data structures.

Introduce a per-IOMMU v1 paging domain in viommu_pdom, allocate
8MB of backing memory as four 2MB subregions, map them into the
domain, and add viommu_private_space_init() /
viommu_private_space_uninit() as a matched pair.

For more info, see section vIOMMU Private Address Space of the
IOMMU specification [1].

[1] https://docs.amd.com/v/u/en-US/48882_3.10_PUB

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 8 ++
drivers/iommu/amd/amd_iommu_types.h | 11 ++
drivers/iommu/amd/iommu.c | 25 ++++-
drivers/iommu/amd/viommu.c | 165 ++++++++++++++++++++++++++++
4 files changed, 204 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2ce207529ea0..279f458becda 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -31,6 +31,7 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
gfp_t gfp, size_t size);
u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end);
void __init iommu_unmap_mmio_space(struct amd_iommu *iommu);
+int iommu_flush_dte(struct amd_iommu *iommu, u16 devid);

#ifdef CONFIG_AMD_IOMMU_DEBUGFS
void amd_iommu_debugfs_setup(void);
@@ -39,6 +40,8 @@ static inline void amd_iommu_debugfs_setup(void) {}
#endif

extern bool amd_iommu_viommu;
+extern const struct pt_iommu_driver_ops amd_hw_driver_ops_v1;
+extern const struct iommu_domain_ops amdv1_ops;

/* Needed for interrupt remapping */
int amd_iommu_prepare(void);
@@ -56,6 +59,8 @@ extern bool amd_iommu_hatdis;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
struct protection_domain *protection_domain_alloc(void);
+struct iommu_domain *amd_iommu_domain_alloc_paging_v1(struct device *dev,
+ u32 flags);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
@@ -100,6 +105,9 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain,
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
ioasid_t pasid, u64 address, size_t size);

+int amd_iommu_flush_private_vm_region(struct amd_iommu *iommu, struct protection_domain *pdom,
+ u64 address, size_t size);
+
#ifdef CONFIG_IRQ_REMAP
int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
#else
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 44fa1d6c64d6..a5e2f32590d1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -424,6 +424,13 @@
/* For vIOMMU, the GID is 16-bit. */
#define VIOMMU_MAX_GID 0xFFFF

+/*
+ * Total IOMMU private region is 8MB (4 x 2MB-subregion)
+ */
+#define VIOMMU_PRIV_REGION_BASE (0)
+#define VIOMMU_PRIV_SUBREGION_CNT (4)
+#define VIOMMU_PRIV_SUBREGION_SIZE (0x200000) /* 2MB */
+
/* Timeout stuff */
#define LOOP_TIMEOUT 100000
#define MMIO_STATUS_TIMEOUT 2000000
@@ -814,6 +821,10 @@ struct amd_iommu {

struct ida gid_ida; /* guest IDs for this IOMMU */
bool gid_ida_inited;
+
+ /* HW vIOMMU support */
+ struct protection_domain *viommu_pdom;
+ void *viommu_priv_region[VIOMMU_PRIV_SUBREGION_CNT];
};

static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 6f5ecc48f4ad..8b441f68bc47 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1551,7 +1551,7 @@ static void domain_flush_complete(struct protection_domain *domain)
amd_iommu_completion_wait(pdom_iommu_info->iommu);
}

-static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
+int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
{
struct iommu_cmd cmd;

@@ -1808,6 +1808,22 @@ static int domain_flush_pages_v1(struct protection_domain *pdom,
return ret;
}

+int amd_iommu_flush_private_vm_region(struct amd_iommu *iommu, struct protection_domain *pdom,
+ u64 address, size_t size)
+{
+ int ret;
+ struct iommu_cmd cmd;
+
+ build_inv_iommu_pages(&cmd, address, size, pdom->id, 0, false);
+
+ ret = iommu_queue_command(iommu, &cmd);
+ if (ret)
+ return ret;
+
+ amd_iommu_completion_wait(iommu);
+ return ret;
+}
+
/*
* TLB invalidation function which is called from the mapping functions.
* It flushes range of PTEs of the domain.
@@ -2726,12 +2742,12 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
iommu_put_pages_list(&gather->freelist);
}

-static const struct pt_iommu_driver_ops amd_hw_driver_ops_v1 = {
+const struct pt_iommu_driver_ops amd_hw_driver_ops_v1 = {
.get_top_lock = amd_iommu_get_top_lock,
.change_top = amd_iommu_change_top,
};

-static const struct iommu_domain_ops amdv1_ops = {
+const struct iommu_domain_ops amdv1_ops = {
IOMMU_PT_DOMAIN_OPS(amdv1),
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
@@ -2746,8 +2762,7 @@ static const struct iommu_dirty_ops amdv1_dirty_ops = {
.set_dirty_tracking = amd_iommu_set_dirty_tracking,
};

-static struct iommu_domain *amd_iommu_domain_alloc_paging_v1(struct device *dev,
- u32 flags)
+struct iommu_domain *amd_iommu_domain_alloc_paging_v1(struct device *dev, u32 flags)
{
struct pt_iommu_amdv1_cfg cfg = {};
struct protection_domain *domain;
diff --git a/drivers/iommu/amd/viommu.c b/drivers/iommu/amd/viommu.c
index 9e6eb2f977ec..c5c85e82c265 100644
--- a/drivers/iommu/amd/viommu.c
+++ b/drivers/iommu/amd/viommu.c
@@ -131,6 +131,167 @@ static int __init viommu_vf_vfcntl_init(struct amd_iommu *iommu)
return -ENOMEM;
}

+/*
+ * Allocate backing pages, mark UC, and map at @iova in viommu_pdom.
+ * *@out_va is NULL on any failure.
+ */
+static int viommu_priv_alloc_map_flush(struct amd_iommu *iommu, u64 iova, size_t size,
+ gfp_t gfp, void **out_va)
+{
+ int ret;
+ void *va;
+ int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
+
+ *out_va = NULL;
+
+ if (!iommu || !iommu->viommu_pdom)
+ return -EINVAL;
+
+ va = iommu_alloc_pages_node_sz(nid, gfp, size);
+ if (!va)
+ return -ENOMEM;
+
+ /*
+ * IOMMU spec mentions that the vIOMMU backing storage memory
+ * should be marked as UC.
+ */
+ ret = set_memory_uc((unsigned long)va, size >> PAGE_SHIFT);
+ if (ret)
+ goto err_free_pages;
+
+ ret = iommu_map(&iommu->viommu_pdom->domain, iova, iommu_virt_to_phys(va), size,
+ IOMMU_PROT_IR | IOMMU_PROT_IW, GFP_KERNEL);
+ if (ret)
+ goto cleanup_mem_attr;
+
+ *out_va = va;
+ return 0;
+
+cleanup_mem_attr:
+ set_memory_wb((unsigned long)va, size >> PAGE_SHIFT);
+err_free_pages:
+ iommu_free_pages(va);
+ return ret;
+}
+
+/*
+ * Unmap @iova, flush the unmapped span on this IOMMU, WB, and free @cpu_va.
+ * Returns 0, or the flush error if amd_iommu_flush_private_vm_region() fails.
+ */
+static int viommu_priv_unmap_flush_free(struct amd_iommu *iommu, u64 iova, size_t size,
+ void *cpu_va)
+{
+ size_t unmapped;
+ int ret = 0;
+
+ if (!cpu_va)
+ return 0;
+ if (!iommu || !iommu->viommu_pdom)
+ return -EINVAL;
+
+ unmapped = iommu_unmap(&iommu->viommu_pdom->domain, iova, size);
+ if (unmapped != size)
+ pr_warn("%s: unmapped %#zx of %#lx at %#llx\n", __func__, unmapped, size, iova);
+
+ if (unmapped) {
+ ret = amd_iommu_flush_private_vm_region(iommu, iommu->viommu_pdom, iova,
+ unmapped);
+ if (ret)
+ pr_warn("%s: IOTLB flush failed (%d) for %#zx at %#llx\n",
+ __func__, ret, unmapped, iova);
+ }
+
+ set_memory_wb((unsigned long)cpu_va, size >> PAGE_SHIFT);
+ iommu_free_pages(cpu_va);
+ return ret;
+}
+
+static void *alloc_private_subregion(struct amd_iommu *iommu, u64 base, size_t size)
+{
+ void *region = NULL;
+ int ret;
+
+ ret = viommu_priv_alloc_map_flush(iommu, base, size, GFP_KERNEL | __GFP_ZERO, &region);
+ if (ret)
+ return NULL;
+
+ pr_debug("%s: base=%#llx, size=%#lx, subregion=%#llx(%#llx)\n",
+ __func__, base, size, (unsigned long long)region, iommu_virt_to_phys(region));
+
+ return region;
+}
+
+static void viommu_private_space_uninit(struct amd_iommu *iommu)
+{
+ int i, ret, first_err = 0;
+ u64 base;
+ struct iommu_domain *dom;
+
+ if (!iommu->viommu_pdom)
+ return;
+
+ for (i = 0; i < VIOMMU_PRIV_SUBREGION_CNT; i++) {
+ if (!iommu->viommu_priv_region[i])
+ continue;
+ base = VIOMMU_PRIV_REGION_BASE + (i * VIOMMU_PRIV_SUBREGION_SIZE);
+ ret = viommu_priv_unmap_flush_free(iommu, base, VIOMMU_PRIV_SUBREGION_SIZE,
+ iommu->viommu_priv_region[i]);
+ if (ret && !first_err)
+ first_err = ret;
+ iommu->viommu_priv_region[i] = NULL;
+ }
+
+ dom = &iommu->viommu_pdom->domain;
+ amd_iommu_domain_free(dom);
+ iommu->viommu_pdom = NULL;
+
+ if (first_err)
+ pr_err("%s: private subregion teardown failed (%d)\n", __func__, first_err);
+}
+static int viommu_private_space_init(struct amd_iommu *iommu)
+{
+ int i;
+ u64 base;
+ struct iommu_domain *dom;
+ struct protection_domain *pdom;
+ struct pt_iommu_amdv1_hw_info pt_info;
+
+ /*
+ * Setup page table root pointer, Guest MMIO and
+ * Cmdbuf Dirty Status regions.
+ */
+ dom = amd_iommu_domain_alloc_paging_v1(&iommu->dev->dev, 0);
+ if (!dom) {
+ pr_err("%s: Failed to initialize private space\n", __func__);
+ return -ENOMEM;
+ }
+
+ pdom = to_pdomain(dom);
+ iommu->viommu_pdom = pdom;
+
+ /*
+ * Each private region requires to 8MB of memory to be allocated
+ * and mapped. Split the region into 4 x 2MB-subregion.
+ */
+ for (i = 0; i < VIOMMU_PRIV_SUBREGION_CNT; i++) {
+ base = VIOMMU_PRIV_REGION_BASE + (i * VIOMMU_PRIV_SUBREGION_SIZE);
+ iommu->viommu_priv_region[i] = alloc_private_subregion(iommu, base,
+ VIOMMU_PRIV_SUBREGION_SIZE);
+ if (!iommu->viommu_priv_region[i]) {
+ pr_err("%s: Failed to allocate vIOMMU private subregion %d\n", __func__, i);
+ viommu_private_space_uninit(iommu);
+ return -ENOMEM;
+ }
+ }
+
+ pt_iommu_amdv1_hw_info(&pdom->amdv1, &pt_info);
+ pr_debug("%s: devid=%#x, pte_root=%#llx\n",
+ __func__, iommu->devid,
+ (unsigned long long)pt_info.host_pt_root);
+
+ return 0;
+}
+
/*
* Returns VF MMIO BAR offset for the give guest ID which will be
* mapped to guest vIOMMU 3rd 4K MMIO address
@@ -160,5 +321,9 @@ int __init amd_viommu_init(struct amd_iommu *iommu)

amd_viommu_gid_ida_init(iommu);

+ ret = viommu_private_space_init(iommu);
+ if (ret)
+ return ret;
+
return 0;
}
--
2.34.1