[PATCH v3 07/18] iommu/vt-d: Implement device and iommu preserve/unpreserve ops

From: Samiullah Khawaja

Date: Sun Jun 14 2026 - 19:41:33 EST


Add implementation of the device and iommu presevation in a separate
file. Also set the device and iommu preserve/unpreserve ops in the
struct iommu_ops.

Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
---
MAINTAINERS | 8 ++
drivers/iommu/intel/Makefile | 1 +
drivers/iommu/intel/iommu.c | 8 +-
drivers/iommu/intel/iommu.h | 28 +++++
drivers/iommu/intel/liveupdate.c | 175 +++++++++++++++++++++++++++++++
include/linux/kho/abi/iommu.h | 21 ++++
6 files changed, 239 insertions(+), 2 deletions(-)
create mode 100644 drivers/iommu/intel/liveupdate.c

diff --git a/MAINTAINERS b/MAINTAINERS
index cef90bb50781..4dfb56962426 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13002,6 +13002,14 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/intel/

+INTEL IOMMU LIVEUPDATE (VT-d)
+M: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
+M: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
+L: iommu@xxxxxxxxxxxxxxx
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
+F: drivers/iommu/intel/liveupdate.c
+
INTEL IPU3 CSI-2 CIO2 DRIVER
M: Yong Zhi <yong.zhi@xxxxxxxxx>
M: Sakari Ailus <sakari.ailus@xxxxxxxxxxxxxxx>
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index ada651c4a01b..d38fc101bc35 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o
+obj-$(CONFIG_IOMMU_LIVEUPDATE) += liveupdate.o
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index c3d18cd77d2f..715b538e7efe 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -16,6 +16,7 @@
#include <linux/crash_dump.h>
#include <linux/dma-direct.h>
#include <linux/dmi.h>
+#include <linux/iommu-liveupdate.h>
#include <linux/memory.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
@@ -60,8 +61,6 @@ static int force_on = 0;
static int intel_iommu_tboot_noforce;
static int no_platform_optin;

-#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
-
/*
* Take a root_entry and return the Lower Context Table Pointer (LCTP)
* if marked present.
@@ -3926,6 +3925,11 @@ const struct iommu_ops intel_iommu_ops = {
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
.page_response = intel_iommu_page_response,
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ .preserve_device = intel_iommu_preserve_device,
+ .preserve = intel_iommu_preserve,
+ .unpreserve = intel_iommu_unpreserve,
+#endif
};

static void quirk_iommu_igfx(struct pci_dev *dev)
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index ef145560aa98..5e0bc17e76bf 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -552,6 +552,8 @@ struct root_entry {
u64 hi;
};

+#define ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(struct root_entry))
+
/*
* low 64 bits:
* 0: present
@@ -1284,6 +1286,32 @@ static inline int iopf_for_domain_replace(struct iommu_domain *new,
return 0;
}

+#ifdef CONFIG_IOMMU_LIVEUPDATE
+int intel_iommu_preserve_device(struct device *dev,
+ struct iommu_device_ser *device_ser);
+int intel_iommu_preserve(struct iommu_device *iommu,
+ struct iommu_hw_ser *iommu_ser);
+void intel_iommu_unpreserve(struct iommu_device *iommu,
+ struct iommu_hw_ser *iommu_ser);
+#else
+static inline int intel_iommu_preserve_device(struct device *dev,
+ struct iommu_device_ser *device_ser)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int intel_iommu_preserve(struct iommu_device *iommu,
+ struct iommu_hw_ser *iommu_ser)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void intel_iommu_unpreserve(struct iommu_device *iommu,
+ struct iommu_hw_ser *iommu_ser)
+{
+}
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
diff --git a/drivers/iommu/intel/liveupdate.c b/drivers/iommu/intel/liveupdate.c
new file mode 100644
index 000000000000..b5613e4ad43a
--- /dev/null
+++ b/drivers/iommu/intel/liveupdate.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2026, Google LLC
+ * Author: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
+ */
+
+#define pr_fmt(fmt) "DMAR: liveupdate: " fmt
+
+#include <linux/kexec_handover.h>
+#include <linux/liveupdate.h>
+#include <linux/iommu-liveupdate.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "iommu.h"
+#include "../iommu-pages.h"
+
+/* 2 tables per bus in scalable mode with upper table at odd bit */
+#define CONTEXT_TABLE_PRESERVED_BIT(bus, devfn) ((bus << 1) + (devfn >> 7))
+static bool is_context_table_preserved(struct intel_iommu *iommu,
+ struct iommu_hw_ser *ser,
+ u8 bus, u8 devfn)
+{
+ return test_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
+ (unsigned long *)&ser->intel.context_tables_bitmap[0]);
+}
+
+static void unpreserve_context_table(struct intel_iommu *iommu,
+ struct iommu_hw_ser *ser,
+ u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ spin_unlock(&iommu->lock);
+ if (context && is_context_table_preserved(iommu, ser, bus, devfn)) {
+ iommu_unpreserve_pages(context);
+ clear_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
+ (unsigned long *)&ser->intel.context_tables_bitmap[0]);
+ }
+}
+
+static int preserve_context_table(struct intel_iommu *iommu,
+ struct iommu_hw_ser *ser,
+ u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+ int ret;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ spin_unlock(&iommu->lock);
+ if (context && !is_context_table_preserved(iommu, ser, bus, devfn)) {
+ ret = iommu_preserve_pages(context);
+ if (ret)
+ return ret;
+
+ set_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
+ (unsigned long *)&ser->intel.context_tables_bitmap[0]);
+ }
+
+ return 0;
+}
+
+static void unpreserve_iommu_context_tables(struct intel_iommu *iommu,
+ struct iommu_hw_ser *ser)
+{
+ int i;
+
+ for (i = 0; i < ROOT_ENTRY_NR; i++) {
+ unpreserve_context_table(iommu, ser, i, 0);
+
+ if (!sm_supported(iommu))
+ continue;
+
+ unpreserve_context_table(iommu, ser, i, 0x80);
+ }
+}
+
+static int preserve_iommu_context_tables(struct device_domain_info *info)
+{
+ struct iommu_hw_ser *iommu_ser;
+ struct intel_iommu *iommu;
+ int ret;
+ int i;
+
+ /* IOMMU for this device should already preserved.*/
+ iommu = info->iommu;
+ iommu_ser = iommu_preserved_state(&iommu->iommu);
+ if (!iommu_ser)
+ return -EINVAL;
+
+ /*
+ * We could do preservation of context tables only for the bus of this
+ * device, but these devices can have PCI aliases, so context tables for
+ * those will also require preservation. Also unpreserve would require
+ * some kind of refcounting where the context table will only be
+ * unpreserved when the last device associated with it is unpreserved.
+ *
+ * This introduces unnecessary complication with minimum benefits as the
+ * unpreserved context tables will probably be recreated by the next
+ * kernel as these are all active devices. We follow simpler approach by
+ * just preserving the currently active context tables.
+ */
+ for (i = 0; i < ROOT_ENTRY_NR; i++) {
+ ret = preserve_context_table(iommu, iommu_ser, i, 0);
+ if (ret)
+ return ret;
+
+ if (!sm_supported(iommu))
+ continue;
+
+ ret = preserve_context_table(iommu, iommu_ser, i, 0x80);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int intel_iommu_preserve_device(struct device *dev,
+ struct iommu_device_ser *device_ser)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ int ret;
+
+ if (!dev_is_pci(dev)) {
+ dev_err(dev, "Cannot preserve non-PCI device\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!info || !info->domain)
+ return -EINVAL;
+
+ ret = preserve_iommu_context_tables(info);
+ if (ret)
+ return ret;
+
+ device_ser->domain_iommu_ser.attachment_id = domain_id_iommu(info->domain,
+ info->iommu);
+ return 0;
+}
+
+int intel_iommu_preserve(struct iommu_device *iommu_dev,
+ struct iommu_hw_ser *ser)
+{
+ struct intel_iommu *iommu;
+ int ret;
+
+ iommu = container_of(iommu_dev, struct intel_iommu, iommu);
+
+ ret = iommu_preserve_pages(iommu->root_entry);
+ if (ret)
+ return ret;
+
+ ser->intel.phys_addr = iommu->reg_phys;
+ ser->intel.root_table = __pa(iommu->root_entry);
+ ser->type = IOMMU_INTEL;
+ ser->token = ser->intel.phys_addr;
+
+ return 0;
+}
+
+void intel_iommu_unpreserve(struct iommu_device *iommu_dev,
+ struct iommu_hw_ser *ser)
+{
+ struct intel_iommu *iommu;
+
+ iommu = container_of(iommu_dev, struct intel_iommu, iommu);
+
+ unpreserve_iommu_context_tables(iommu, ser);
+ iommu_unpreserve_pages(iommu->root_entry);
+}
diff --git a/include/linux/kho/abi/iommu.h b/include/linux/kho/abi/iommu.h
index d06f251a2df4..ad760c497e13 100644
--- a/include/linux/kho/abi/iommu.h
+++ b/include/linux/kho/abi/iommu.h
@@ -80,6 +80,7 @@
*/
enum iommu_type_ser {
IOMMU_INVALID,
+ IOMMU_INTEL,
};

#define IOMMU_SER_FLAG_DELETED (1 << 0)
@@ -140,16 +141,36 @@ struct iommu_device_ser {
struct iommu_dev_map_ser domain_iommu_ser;
} __packed;

+/**
+ * struct iommu_intel_ser - Serialized state of an Intel IOMMU instance
+ * @restored: Whether IOMMU state is restored
+ * @phys_addr: Physical address of the IOMMU register base
+ * @root_table: Physical address of the root entry table
+ * @context_tables_bitmap: Bitmap representing the context tables that are
+ * preserved.
+ */
+struct iommu_intel_ser {
+ u8 restored;
+ u8 padding[7];
+ u64 phys_addr;
+ u64 root_table;
+ u64 context_tables_bitmap[8]; /* Tracks upto 512 context tables */
+};
+
/**
* struct iommu_hw_ser - Serialized state of an IOMMU instance
* @hdr: Common object header
* @token: Unique token for the IOMMU
* @type: IOMMU type serialized state belongs to
+ * @intel: Intel specific serialization data
*/
struct iommu_hw_ser {
struct iommu_hdr_ser hdr;
u64 token;
u64 type;
+ union {
+ struct iommu_intel_ser intel;
+ };
} __packed;

/**
--
2.54.0.1136.gdb2ca164c4-goog