Re: [PATCH v3 07/18] iommu/vt-d: Implement device and iommu preserve/unpreserve ops
From: Samiullah Khawaja
Date: Mon Jun 22 2026 - 15:19:27 EST
On Mon, Jun 22, 2026 at 09:50:36AM +0800, Baolu Lu wrote:
On 6/15/26 07:37, Samiullah Khawaja wrote:
Add implementation of the device and iommu presevation in a separate
file. Also set the device and iommu preserve/unpreserve ops in the
struct iommu_ops.
Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
---
MAINTAINERS | 8 ++
drivers/iommu/intel/Makefile | 1 +
drivers/iommu/intel/iommu.c | 8 +-
drivers/iommu/intel/iommu.h | 28 +++++
drivers/iommu/intel/liveupdate.c | 175 +++++++++++++++++++++++++++++++
include/linux/kho/abi/iommu.h | 21 ++++
6 files changed, 239 insertions(+), 2 deletions(-)
create mode 100644 drivers/iommu/intel/liveupdate.c
[snip]
-
/*
* Take a root_entry and return the Lower Context Table Pointer (LCTP)
* if marked present.
@@ -3926,6 +3925,11 @@ const struct iommu_ops intel_iommu_ops = {
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
.page_response = intel_iommu_page_response,
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ .preserve_device = intel_iommu_preserve_device,
+ .preserve = intel_iommu_preserve,
+ .unpreserve = intel_iommu_unpreserve,
+#endif
Any reason why an unpreserve_device callback is missing here?
I added unpreserve_device in a later patch when PASID support is added,
as the context tables are currently unpreserved globally during
unpreserve(iommu).
But I agree this looks incomplete and I will add a stub
unpreserve_device in this patch.
};
static void quirk_iommu_igfx(struct pci_dev *dev)
[snip]
+
+static void unpreserve_context_table(struct intel_iommu *iommu,
+ struct iommu_hw_ser *ser,
+ u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ spin_unlock(&iommu->lock);
The spinlock is dropped immediately after reading the address pointer.
If this is guaranteed to be safe, please add a comment to explain why a
UAF or race is avoided here. Otherwise, the locking scope needs to be
extened to protect both the pointer lookup and use.
In the Intel VT-d driver, context tables are never freed once they are
allocated during runtime, as they can be shared across multiple devices.
So I took the lock here to protect against concurrent allocations inside
iommu_context_addr(). Once the address is read, it is safe to use
without holding the lock until the DMAR unit itself is torn down.
I will add a comment explaining this here.
+ if (context && is_context_table_preserved(iommu, ser, bus, devfn)) {
+ iommu_unpreserve_pages(context);
+ clear_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
+ (unsigned long *)&ser->intel.context_tables_bitmap[0]);
+ }
+}
+
+static int preserve_context_table(struct intel_iommu *iommu,
+ struct iommu_hw_ser *ser,
+ u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+ int ret;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ spin_unlock(&iommu->lock);
Ditto.
Answered above.
+ if (context && !is_context_table_preserved(iommu, ser, bus, devfn)) {
+ ret = iommu_preserve_pages(context);
+ if (ret)
+ return ret;
+
+ set_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
+ (unsigned long *)&ser->intel.context_tables_bitmap[0]);
+ }
+
+ return 0;
+}
+
[snip]
diff --git a/include/linux/kho/abi/iommu.h b/include/linux/kho/abi/iommu.h
index d06f251a2df4..ad760c497e13 100644
--- a/include/linux/kho/abi/iommu.h
+++ b/include/linux/kho/abi/iommu.h
@@ -80,6 +80,7 @@
*/
enum iommu_type_ser {
IOMMU_INVALID,
+ IOMMU_INTEL,
};
#define IOMMU_SER_FLAG_DELETED (1 << 0)
@@ -140,16 +141,36 @@ struct iommu_device_ser {
struct iommu_dev_map_ser domain_iommu_ser;
} __packed;
+/**
+ * struct iommu_intel_ser - Serialized state of an Intel IOMMU instance
+ * @restored: Whether IOMMU state is restored
+ * @phys_addr: Physical address of the IOMMU register base
+ * @root_table: Physical address of the root entry table
+ * @context_tables_bitmap: Bitmap representing the context tables that are
+ * preserved.
+ */
+struct iommu_intel_ser {
+ u8 restored;
+ u8 padding[7];
+ u64 phys_addr;
+ u64 root_table;
+ u64 context_tables_bitmap[8]; /* Tracks upto 512 context tables */
To avoid open-coded magic numbers, how about something like,
#define VTD_PRESERVED_BITMAP_LONGS DIV_ROUND_UP(512, BITS_PER_LONG_LONG)
u64 context_tables_bitmap[VTD_PRESERVED_BITMAP_LONGS];
?
This looks great to me. I will update this here.
+};
+
/**
* struct iommu_hw_ser - Serialized state of an IOMMU instance
* @hdr: Common object header
* @token: Unique token for the IOMMU
* @type: IOMMU type serialized state belongs to
+ * @intel: Intel specific serialization data
*/
struct iommu_hw_ser {
struct iommu_hdr_ser hdr;
u64 token;
u64 type;
+ union {
+ struct iommu_intel_ser intel;
+ };
} __packed;
/**
Thanks,
baolu
Thanks,
Sami