[PATCH v2 19/26] iommu/amd: Add per-segment translate device ID pool

From: Suravee Suthikulpanit

Date: Thu May 28 2026 - 01:20:45 EST


Track translate-device-id slots per PCI segment so real PCI device IDs can
be reserved for normal DTE programming and excluded from dynamic allocation
for vIOMMU translation DTEs.

Add amd_iommu_pci_seg_trans_devid_init/fini() during segment setup and
teardown, amd_iommu_trans_devid_reserve() for attach-time reservation, and
trans_devid.c implementing the xarray-backed state machine.

Call the reserve hook from amd_iommu_attach_device() before programming
the DTE.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/Makefile | 2 +-
drivers/iommu/amd/amd_iommu.h | 12 +++++
drivers/iommu/amd/amd_iommu_types.h | 17 ++++++
drivers/iommu/amd/init.c | 3 ++
drivers/iommu/amd/iommu.c | 12 +++++
drivers/iommu/amd/trans_devid.c | 80 +++++++++++++++++++++++++++++
6 files changed, 125 insertions(+), 1 deletion(-)
create mode 100644 drivers/iommu/amd/trans_devid.c

diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index e1e824b9c7b0..12c3fe83e4ce 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += iommu.o init.o quirks.o ppr.o pasid.o
-obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o nested.o viommu.o
+obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o nested.o viommu.o trans_devid.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index d1640181b292..d411bc326241 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -216,6 +216,18 @@ void amd_iommu_update_dte(struct amd_iommu *iommu,
struct dev_table_entry *new);
int amd_iommu_completion_wait(struct amd_iommu *iommu);

+/* Per-segment translate-device-id pool (CONFIG_AMD_IOMMU_IOMMUFD) */
+#ifdef CONFIG_AMD_IOMMU_IOMMUFD
+void amd_iommu_pci_seg_trans_devid_init(struct amd_iommu_pci_seg *pci_seg);
+void amd_iommu_pci_seg_trans_devid_fini(struct amd_iommu_pci_seg *pci_seg);
+int amd_iommu_trans_devid_reserve(struct amd_iommu_pci_seg *pci_seg, u16 id);
+#else
+static inline void
+amd_iommu_pci_seg_trans_devid_init(struct amd_iommu_pci_seg *pci_seg) { }
+static inline void
+amd_iommu_pci_seg_trans_devid_fini(struct amd_iommu_pci_seg *pci_seg) { }
+#endif
+
void amd_iommu_set_translate_dte(struct amd_iommu *iommu, u16 gid,
struct protection_domain *pdom,
u32 devid);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 2d7bc791dbd9..ffa338c8735f 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -615,6 +615,14 @@ PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv2.iommu, domain);

+#ifdef CONFIG_AMD_IOMMU_IOMMUFD
+enum trans_devid_state {
+ TRANS_DEVID_FREE = 0,
+ TRANS_DEVID_RESERVED,
+ TRANS_DEVID_ALLOCATED,
+};
+#endif
+
/*
* This structure contains information about one PCI segment in the system.
*/
@@ -676,6 +684,15 @@ struct amd_iommu_pci_seg {
* parsing time.
*/
struct list_head unity_map;
+
+#ifdef CONFIG_AMD_IOMMU_IOMMUFD
+ /*
+ * Per-segment translate-device-id allocation. The xarray is indexed by
+ * the translate-device-id. The value is the state (enum trans_devid_state).
+ */
+ struct mutex trans_devid_mutex;
+ struct xarray trans_devid_xa;
+#endif
};

/*
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 6e69b3dd8b1e..622bc0337eda 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1737,6 +1737,8 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
if (alloc_rlookup_table(pci_seg))
goto err_free_alias_table;

+ amd_iommu_pci_seg_trans_devid_init(pci_seg);
+
return pci_seg;

err_free_alias_table:
@@ -1768,6 +1770,7 @@ static void __init free_pci_segments(void)

for_each_pci_segment_safe(pci_seg, next) {
list_del(&pci_seg->list);
+ amd_iommu_pci_seg_trans_devid_fini(pci_seg);
free_irq_lookup_table(pci_seg);
free_rlookup_table(pci_seg);
free_alias_table(pci_seg);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 6c4c4f62ddde..2600af84c8ca 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3055,6 +3055,18 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev,
if (dom->dirty_ops && !amd_iommu_hd_support(iommu))
return -EINVAL;

+#if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)
+ /* Translate-device-id reservation must be done before setting up
+ * the DTE for the device to make sure that the id has not been allocated
+ * yet. (See amd_iommu_trans_devid_alloc().)
+ */
+ ret = amd_iommu_trans_devid_reserve(iommu->pci_seg, dev_data->devid);
+ if (ret) {
+ pr_err("%s: Failed to reserve device id %#x\n", __func__, dev_data->devid);
+ return ret;
+ }
+#endif
+
if (dev_data->domain)
detach_device(dev);

diff --git a/drivers/iommu/amd/trans_devid.c b/drivers/iommu/amd/trans_devid.c
new file mode 100644
index 000000000000..f15cbaae9118
--- /dev/null
+++ b/drivers/iommu/amd/trans_devid.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * AMD vIOMMU translate-device-id pool per PCI segment.
+ */
+
+#include <linux/kernel.h>
+#include <linux/xarray.h>
+
+#include "amd_iommu.h"
+
+static inline enum trans_devid_state trans_devid_xa_get_state(void *entry)
+{
+ if (!entry)
+ return TRANS_DEVID_FREE;
+ if (WARN_ON_ONCE(!xa_is_value(entry)))
+ return TRANS_DEVID_FREE;
+ return (enum trans_devid_state)xa_to_value(entry);
+}
+
+static inline void *trans_devid_xa_mk_state(enum trans_devid_state s)
+{
+ return xa_mk_value((unsigned long)s);
+}
+
+void amd_iommu_pci_seg_trans_devid_init(struct amd_iommu_pci_seg *pci_seg)
+{
+ mutex_init(&pci_seg->trans_devid_mutex);
+ xa_init(&pci_seg->trans_devid_xa);
+}
+
+void amd_iommu_pci_seg_trans_devid_fini(struct amd_iommu_pci_seg *pci_seg)
+{
+ xa_destroy(&pci_seg->trans_devid_xa);
+}
+
+/**
+ * amd_iommu_trans_devid_reserve - occupy @id so it is never returned by alloc
+ *
+ * Reservation is done when attaching device to a domain (see amd_iommu_attach_device()).
+ *
+ * Note: Since PCI hot-plug devices are enumerated during runtime, they could clash
+ * with the translate-device-id allocation. In such case, amd_iommu_trans_devid_reserve()
+ * could fail with %-EBUSY. This can be avoided by reserving the hot-plug id range if it
+ * is known in advance.
+ *
+ * Return: 0 on success, %-EBUSY if @id is already allocated. A second reserve of
+ * an already-reserved @id succeeds.
+ */
+int amd_iommu_trans_devid_reserve(struct amd_iommu_pci_seg *pci_seg, u16 id)
+{
+ void *entry, *old;
+ int ret = 0;
+
+ mutex_lock(&pci_seg->trans_devid_mutex);
+ entry = xa_load(&pci_seg->trans_devid_xa, id);
+ switch (trans_devid_xa_get_state(entry)) {
+ case TRANS_DEVID_ALLOCATED:
+ ret = -EBUSY;
+ break;
+ case TRANS_DEVID_RESERVED:
+ break;
+ case TRANS_DEVID_FREE:
+ old = xa_store(&pci_seg->trans_devid_xa, id,
+ trans_devid_xa_mk_state(TRANS_DEVID_RESERVED), GFP_KERNEL);
+ if (xa_is_err(old)) {
+ ret = xa_err(old);
+ break;
+ }
+ WARN_ON_ONCE(old);
+ break;
+ }
+ mutex_unlock(&pci_seg->trans_devid_mutex);
+
+ if (!ret)
+ pr_debug("%s: Reserved trans_devid %#x (seg %#x)\n", __func__, id,
+ pci_seg->id);
+ return ret;
+}
--
2.34.1