[PATCH v5 11/14] iommu/amd: Introduce gDomID-to-hDomID Mapping and handle parent domain invalidation

From: Suravee Suthikulpanit

Date: Wed Nov 12 2025 - 13:26:34 EST


Each nested domain is assigned guest domain ID (gDomID), which guest OS
programs into guest Device Table Entry (gDTE). For each gDomID, the driver
assigns a corresponding host domain ID (hDomID), which will be programmed
into the host Device Table Entry (hDTE).

The hDomID is allocated during amd_iommu_alloc_domain_nested(),
and free during nested_domain_free(). The gDomID-to-hDomID mapping info
(struct guest_domain_mapping_info) is stored in a per-viommu xarray
(struct amd_iommu_viommu.gdomid_array), which is indexed by gDomID.

Note also that parent domain can be shared among struct iommufd_viommu.
Therefore, when hypervisor invalidates the nest parent domain, the AMD
IOMMU command INVALIDATE_IOMMU_PAGES must be issued for each hDomID in
the gdomid_array. This is handled by the iommu_flush_pages_v1_hdom_ids(),
where it iterates through struct protection_domain.viommu_list.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu_types.h | 23 +++++++++
drivers/iommu/amd/iommu.c | 35 +++++++++++++
drivers/iommu/amd/iommufd.c | 34 ++++++++++++
drivers/iommu/amd/nested.c | 80 +++++++++++++++++++++++++++++
4 files changed, 172 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index e46f346fd6c5..734f6a753b3a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -498,6 +498,22 @@ struct pdom_iommu_info {
struct amd_iommu_viommu {
struct iommufd_viommu core;
struct protection_domain *parent; /* nest parent domain for this viommu */
+ struct list_head pdom_list; /* For protection_domain->viommu_list */
+
+ /*
+ * Per-vIOMMU guest domain ID to host domain ID mapping.
+ * Indexed by guest domain ID.
+ */
+ struct xarray gdomid_array;
+};
+
+/*
+ * Contains guest domain ID mapping info,
+ * which is stored in the struct xarray gdomid_array.
+ */
+struct guest_domain_mapping_info {
+ refcount_t users;
+ u32 hdom_id; /* Host domain ID */
};

/*
@@ -506,6 +522,7 @@ struct amd_iommu_viommu {
struct nested_domain {
struct iommu_domain domain; /* generic domain handle used by iommu core code */
u16 gdom_id; /* domain ID from gDTE */
+ struct guest_domain_mapping_info *gdom_info;
struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */
struct amd_iommu_viommu *viommu; /* AMD hw-viommu this nested domain belong to */
};
@@ -530,6 +547,12 @@ struct protection_domain {

struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
+
+ /*
+ * Store reference to list of vIOMMUs, which use this protection domain.
+ * This will be used to look up host domain ID when flushing this domain.
+ */
+ struct list_head viommu_list;
};
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 724b8723b836..6a26e7a28141 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1513,6 +1513,29 @@ static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
iommu_completion_wait(iommu);
}

+static int iommu_flush_pages_v1_hdom_ids(struct protection_domain *pdom, u64 address, size_t size)
+{
+ int ret = 0;
+ struct amd_iommu_viommu *aviommu;
+
+ list_for_each_entry(aviommu, &pdom->viommu_list, pdom_list) {
+ unsigned long i;
+ struct guest_domain_mapping_info *gdom_info;
+ struct amd_iommu *iommu = container_of(aviommu->core.iommu_dev, struct amd_iommu, iommu);
+
+ xa_for_each(&aviommu->gdomid_array, i, gdom_info) {
+ struct iommu_cmd cmd;
+
+ pr_debug("%s: iommu=%#x, hdom_id=%#x\n", __func__,
+ iommu->devid, gdom_info->hdom_id);
+ build_inv_iommu_pages(&cmd, address, size, gdom_info->hdom_id,
+ IOMMU_NO_PASID, false);
+ ret |= iommu_queue_command(iommu, &cmd);
+ }
+ }
+ return ret;
+}
+
static void amd_iommu_flush_all(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
@@ -1661,6 +1684,17 @@ static int domain_flush_pages_v1(struct protection_domain *pdom,
ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
}

+ /*
+ * A domain w/ v1 table can be a nest parent, which can have
+ * multiple nested domains. Each nested domain has 1:1 mapping
+ * between gDomID and hDomID. Therefore, flush every hDomID
+ * associated to this nest parent domain.
+ *
+ * See drivers/iommu/amd/nested.c: amd_iommu_alloc_domain_nested()
+ */
+ if (!list_empty(&pdom->viommu_list))
+ ret |= iommu_flush_pages_v1_hdom_ids(pdom, address, size);
+
return ret;
}

@@ -2469,6 +2503,7 @@ static void protection_domain_init(struct protection_domain *domain)
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
+ INIT_LIST_HEAD(&domain->viommu_list);
xa_init(&domain->iommu_array);
}

diff --git a/drivers/iommu/amd/iommufd.c b/drivers/iommu/amd/iommufd.c
index eb6119bdcf12..bb53475f9171 100644
--- a/drivers/iommu/amd/iommufd.c
+++ b/drivers/iommu/amd/iommufd.c
@@ -9,6 +9,8 @@
#include "amd_iommu.h"
#include "amd_iommu_types.h"

+static const struct iommufd_viommu_ops amd_viommu_ops;
+
void *amd_iommufd_hw_info(struct device *dev, u32 *length, u32 *type)
{
struct iommu_hw_info_amd *hwinfo;
@@ -38,10 +40,42 @@ size_t amd_iommufd_get_viommu_size(struct device *dev, enum iommu_viommu_type vi
int amd_iommufd_viommu_init(struct iommufd_viommu *viommu, struct iommu_domain *parent,
const struct iommu_user_data *user_data)
{
+ unsigned long flags;
struct protection_domain *pdom = to_pdomain(parent);
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);

+ xa_init(&aviommu->gdomid_array);
aviommu->parent = pdom;

+ viommu->ops = &amd_viommu_ops;
+
+ spin_lock_irqsave(&pdom->lock, flags);
+ list_add(&aviommu->pdom_list, &pdom->viommu_list);
+ spin_unlock_irqrestore(&pdom->lock, flags);
+
return 0;
}
+
+static void amd_iommufd_viommu_destroy(struct iommufd_viommu *viommu)
+{
+ unsigned long flags;
+ struct amd_iommu_viommu *entry, *next;
+ struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
+ struct protection_domain *pdom = aviommu->parent;
+
+ spin_lock_irqsave(&pdom->lock, flags);
+ list_for_each_entry_safe(entry, next, &pdom->viommu_list, pdom_list) {
+ if (entry == aviommu)
+ list_del(&entry->pdom_list);
+ }
+ spin_unlock_irqrestore(&pdom->lock, flags);
+
+}
+
+/*
+ * See include/linux/iommufd.h
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ */
+static const struct iommufd_viommu_ops amd_viommu_ops = {
+ .destroy = amd_iommufd_viommu_destroy,
+};
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
index dd3e53dd16ea..1bbcb16abecc 100644
--- a/drivers/iommu/amd/nested.c
+++ b/drivers/iommu/amd/nested.c
@@ -6,6 +6,7 @@
#define dev_fmt(fmt) "AMD-Vi: " fmt

#include <linux/iommu.h>
+#include <linux/refcount.h>
#include <uapi/linux/iommufd.h>

#include "amd_iommu.h"
@@ -68,6 +69,7 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
{
int ret;
struct nested_domain *ndom;
+ struct guest_domain_mapping_info *gdom_info, *curr;
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);

if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
@@ -92,7 +94,60 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
ndom->domain.type = IOMMU_DOMAIN_NESTED;
ndom->viommu = aviommu;

+ gdom_info = kzalloc(sizeof(*gdom_info), GFP_KERNEL);
+ if (!gdom_info)
+ goto out_err;
+
+ /*
+ * Normally, when a guest has multiple pass-through devices,
+ * the IOMMU driver setup DTEs with the same stage-2 table and
+ * use the same host domain ID (hDomId). In case of nested translation,
+ * if the guest setup different stage-1 tables with same PASID,
+ * IOMMU would use the same TLB tag. This will results in TLB
+ * aliasing issue.
+ *
+ * The guest is assigning gDomIDs based on its own algorithm for managing
+ * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
+ * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
+ * to a single hDomID. This is done using an xarray in the vIOMMU to
+ * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
+ * command must be issued for each hDomID in the xarray.
+ */
+ curr = xa_cmpxchg(&aviommu->gdomid_array,
+ ndom->gdom_id, NULL, gdom_info, GFP_ATOMIC);
+ if (curr) {
+ if (xa_err(curr)) {
+ ret = -EINVAL;
+ goto out_err_gdom_info;
+ } else {
+ /* The gDomID already exist */
+ pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
+ __func__, ndom->gdom_id, curr->hdom_id);
+ refcount_inc(&curr->users);
+ ndom->gdom_info = curr;
+ kfree(gdom_info);
+ return &ndom->domain;
+ }
+ }
+
+ /* The gDomID does not exist. We allocate new hdom_id */
+ gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
+ if (gdom_info->hdom_id <= 0) {
+ xa_cmpxchg(&aviommu->gdomid_array,
+ ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
+ ret = -ENOSPC;
+ goto out_err_gdom_info;
+ }
+
+ refcount_set(&gdom_info->users, 1);
+ ndom->gdom_info = gdom_info;
+ pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
+ __func__, ndom->gdom_id, gdom_info->hdom_id);
+
return &ndom->domain;
+
+out_err_gdom_info:
+ kfree(gdom_info);
out_err:
kfree(ndom);
return ERR_PTR(ret);
@@ -100,8 +155,33 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,

static void nested_domain_free(struct iommu_domain *dom)
{
+ struct guest_domain_mapping_info *curr;
struct nested_domain *ndom = to_ndomain(dom);
+ struct amd_iommu_viommu *aviommu = ndom->viommu;
+
+ if (!refcount_dec_and_test(&ndom->gdom_info->users))
+ return;

+ /*
+ * The refcount for the gdom_id to hdom_id mapping is zero.
+ * It is now safe to remove the mapping.
+ */
+ curr = xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
+ ndom->gdom_info, NULL, GFP_ATOMIC);
+ if (curr) {
+ if (xa_err(curr)) {
+ pr_err("%s: Failed to free nested domain gdom_id=%#x\n",
+ __func__, ndom->gdom_id);
+ return;
+ }
+
+ /* success */
+ pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
+ __func__, ndom->gdom_id, curr->hdom_id);
+ kfree(curr);
+ }
+
+ amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
kfree(ndom);
}

--
2.34.1