[PATCH v2 11/26] iommu/amd: Allocate and map vIOMMU private regions

From: Suravee Suthikulpanit

Date: Thu May 28 2026 - 01:19:58 EST


The AMD IOMMU Private Address (IPA) region is allocated and mapped during
IOMMU driver initialization. According to the specification, 8MB is needed.
Since the hardware does not require the IPA region to be physically
contiguous, split the IPA region into 4 2MB subregions to match hugepage
granularity and create mapping in the v1 page-table for each IOMMU.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 3 ++
drivers/iommu/amd/amd_iommu_types.h | 8 +++
drivers/iommu/amd/iommu.c | 16 ++++++
drivers/iommu/amd/viommu.c | 79 +++++++++++++++++++++++++++--
4 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 1aa79a26a127..279f458becda 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -105,6 +105,9 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain,
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
ioasid_t pasid, u64 address, size_t size);

+int amd_iommu_flush_private_vm_region(struct amd_iommu *iommu, struct protection_domain *pdom,
+ u64 address, size_t size);
+
#ifdef CONFIG_IRQ_REMAP
int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
#else
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 02d359f09148..a5e2f32590d1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -424,6 +424,13 @@
/* For vIOMMU, the GID is 16-bit. */
#define VIOMMU_MAX_GID 0xFFFF

+/*
+ * Total IOMMU private region is 8MB (4 x 2MB-subregion)
+ */
+#define VIOMMU_PRIV_REGION_BASE (0)
+#define VIOMMU_PRIV_SUBREGION_CNT (4)
+#define VIOMMU_PRIV_SUBREGION_SIZE (0x200000) /* 2MB */
+
/* Timeout stuff */
#define LOOP_TIMEOUT 100000
#define MMIO_STATUS_TIMEOUT 2000000
@@ -817,6 +824,7 @@ struct amd_iommu {

/* HW vIOMMU support */
struct protection_domain *viommu_pdom;
+ void *viommu_priv_region[VIOMMU_PRIV_SUBREGION_CNT];
};

static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d89664eba898..8b441f68bc47 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1808,6 +1808,22 @@ static int domain_flush_pages_v1(struct protection_domain *pdom,
return ret;
}

+int amd_iommu_flush_private_vm_region(struct amd_iommu *iommu, struct protection_domain *pdom,
+ u64 address, size_t size)
+{
+ int ret;
+ struct iommu_cmd cmd;
+
+ build_inv_iommu_pages(&cmd, address, size, pdom->id, 0, false);
+
+ ret = iommu_queue_command(iommu, &cmd);
+ if (ret)
+ return ret;
+
+ amd_iommu_completion_wait(iommu);
+ return ret;
+}
+
/*
* TLB invalidation function which is called from the mapping functions.
* It flushes range of PTEs of the domain.
diff --git a/drivers/iommu/amd/viommu.c b/drivers/iommu/amd/viommu.c
index 14426649074f..90ed2eb92aeb 100644
--- a/drivers/iommu/amd/viommu.c
+++ b/drivers/iommu/amd/viommu.c
@@ -131,8 +131,66 @@ static int __init viommu_vf_vfcntl_init(struct amd_iommu *iommu)
return -ENOMEM;
}

+static void *alloc_private_subregion(struct amd_iommu *iommu, u64 base, size_t size)
+{
+ int ret;
+ void *region;
+ int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
+
+ region = (void *)iommu_alloc_pages_node_sz(nid, GFP_KERNEL | __GFP_ZERO, size);
+ if (!region)
+ return NULL;
+
+ ret = set_memory_uc((unsigned long)region, size >> PAGE_SHIFT);
+ if (ret)
+ goto err_out;
+
+ ret = iommu_map(&iommu->viommu_pdom->domain, base,
+ iommu_virt_to_phys(region), size,
+ IOMMU_PROT_IR | IOMMU_PROT_IW, GFP_KERNEL);
+
+ if (ret)
+ goto cleanup_mem_attr;
+
+ pr_debug("%s: base=%#llx, size=%#lx, subregion=%#llx(%#llx)\n",
+ __func__, base, size, (unsigned long long)region, iommu_virt_to_phys(region));
+
+ amd_iommu_flush_private_vm_region(iommu, iommu->viommu_pdom, base, size);
+
+ return region;
+cleanup_mem_attr:
+ set_memory_wb((unsigned long)region, size >> PAGE_SHIFT);
+err_out:
+ iommu_free_pages(region);
+ return NULL;
+}
+
+static void viommu_private_space_uninit(struct amd_iommu *iommu)
+{
+ int i;
+ struct iommu_domain *dom;
+
+ if (!iommu->viommu_pdom)
+ return;
+
+ for (i = 0; i < VIOMMU_PRIV_SUBREGION_CNT; i++) {
+ if (!iommu->viommu_priv_region[i])
+ continue;
+ set_memory_wb((unsigned long)iommu->viommu_priv_region[i],
+ VIOMMU_PRIV_SUBREGION_SIZE >> PAGE_SHIFT);
+ iommu_free_pages(iommu->viommu_priv_region[i]);
+ iommu->viommu_priv_region[i] = NULL;
+ }
+
+ dom = &iommu->viommu_pdom->domain;
+ amd_iommu_domain_free(dom);
+ iommu->viommu_pdom = NULL;
+}
+
static int viommu_private_space_init(struct amd_iommu *iommu)
{
+ int i;
+ u64 base;
struct iommu_domain *dom;
struct protection_domain *pdom;
struct pt_iommu_amdv1_hw_info pt_info;
@@ -144,22 +202,33 @@ static int viommu_private_space_init(struct amd_iommu *iommu)
dom = amd_iommu_domain_alloc_paging_v1(&iommu->dev->dev, 0);
if (!dom) {
pr_err("%s: Failed to initialize private space\n", __func__);
- goto err_out;
+ return -ENOMEM;
}

pdom = to_pdomain(dom);
iommu->viommu_pdom = pdom;

+ /*
+ * Each private region requires to 8MB of memory to be allocated
+ * and mapped. Split the region into 4 x 2MB-subregion.
+ */
+ for (i = 0; i < VIOMMU_PRIV_SUBREGION_CNT; i++) {
+ base = VIOMMU_PRIV_REGION_BASE + (i * VIOMMU_PRIV_SUBREGION_SIZE);
+ iommu->viommu_priv_region[i] = alloc_private_subregion(iommu, base,
+ VIOMMU_PRIV_SUBREGION_SIZE);
+ if (!iommu->viommu_priv_region[i]) {
+ pr_err("%s: Failed to allocate vIOMMU private subregion %d\n", __func__, i);
+ viommu_private_space_uninit(iommu);
+ return -ENOMEM;
+ }
+ }
+
pt_iommu_amdv1_hw_info(&pdom->amdv1, &pt_info);
pr_debug("%s: devid=%#x, pte_root=%#llx\n",
__func__, iommu->devid,
(unsigned long long)pt_info.host_pt_root);

return 0;
-err_out:
- if (dom)
- amd_iommu_domain_free(dom);
- return -ENOMEM;
}

/*
--
2.34.1