[PATCH v3 06/18] iommu: Implement device and IOMMU HW preservation
From: Samiullah Khawaja
Date: Sun Jun 14 2026 - 19:41:17 EST
Add IOMMU ops to preserve/unpreserve the device state handled by the
IOMMU driver. This is IOMMU specific state of the device that needs to
be preserved for liveupdate. These ops can be implemented by the IOMMU
drivers that support preservation of devices that have their IOMMU
domains preserved. During device preservation the state of the
associated IOMMU is also preserved as dependency.
Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
---
drivers/iommu/iommu.c | 18 ++++
drivers/iommu/liveupdate.c | 172 +++++++++++++++++++++++++++++++
include/linux/iommu-liveupdate.h | 43 ++++++++
include/linux/iommu.h | 34 ++++++
4 files changed, 267 insertions(+)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 61c12ba78206..41effee10e33 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -307,6 +307,24 @@ void iommu_device_unregister(struct iommu_device *iommu)
}
EXPORT_SYMBOL_GPL(iommu_device_unregister);
+static int _iommu_for_each_dev_cb(struct device *dev, void *data)
+{
+ struct iommu_dev_iter *iter = data;
+
+ if (dev->iommu && dev->iommu->iommu_dev == iter->iommu)
+ return iter->fn(dev, iter->iommu, iter->arg);
+
+ return 0;
+}
+
+void iommu_for_each_dev(struct iommu_dev_iter *iter)
+{
+ for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
+ bus_for_each_dev(iommu_buses[i], NULL, iter,
+ _iommu_for_each_dev_cb);
+}
+EXPORT_SYMBOL_GPL(iommu_for_each_dev);
+
#if IS_ENABLED(CONFIG_IOMMUFD_TEST)
void iommu_device_unregister_bus(struct iommu_device *iommu,
const struct bus_type *bus,
diff --git a/drivers/iommu/liveupdate.c b/drivers/iommu/liveupdate.c
index 16a0f75a98e3..2d782023407d 100644
--- a/drivers/iommu/liveupdate.c
+++ b/drivers/iommu/liveupdate.c
@@ -13,6 +13,7 @@
#include <linux/iommu.h>
#include <linux/kexec_handover.h>
#include <linux/liveupdate.h>
+#include <linux/pci.h>
#define iommu_max_objs_per_page(_array) \
((PAGE_SIZE - sizeof(struct iommu_array_hdr_ser)) / sizeof((_array)->objects[0]))
@@ -314,3 +315,174 @@ void iommu_unpreserve_domain(struct iommu_domain *domain)
domain->preserved_state = NULL;
}
EXPORT_SYMBOL_GPL(iommu_unpreserve_domain);
+
+static struct iommu_hw_ser *alloc_iommu_hw_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((void **)&flb->curr_iommu_array,
+ iommu_max_objs_per_page(flb->curr_iommu_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_iommu_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_iommu_array->objects[idx];
+}
+
+static int iommu_preserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser;
+ int ret;
+
+ if (!iommu->ops->preserve || !iommu->ops->unpreserve)
+ return -EOPNOTSUPP;
+
+ lockdep_assert_held(&flb_obj->lock);
+ if (iommu->outgoing_preserved_state) {
+ iommu->outgoing_preserved_state->hdr.ref_count++;
+ return 0;
+ }
+
+ iommu_hw_ser = alloc_iommu_hw_ser(flb_obj);
+ if (IS_ERR(iommu_hw_ser))
+ return PTR_ERR(iommu_hw_ser);
+
+ ret = iommu->ops->preserve(iommu, iommu_hw_ser);
+ if (ret) {
+ iommu_hw_ser->hdr.flags |= IOMMU_SER_FLAG_DELETED;
+ return ret;
+ }
+
+ iommu->outgoing_preserved_state = iommu_hw_ser;
+ return ret;
+}
+
+static void iommu_unpreserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser = iommu->outgoing_preserved_state;
+
+ lockdep_assert_held(&flb_obj->lock);
+ if (WARN_ON(!iommu_hw_ser))
+ return;
+
+ if (WARN_ON_ONCE(!iommu->ops->preserve ||
+ !iommu->ops->unpreserve))
+ return;
+
+ iommu_hw_ser->hdr.ref_count--;
+ if (iommu_hw_ser->hdr.ref_count)
+ return;
+
+ iommu->outgoing_preserved_state = NULL;
+ iommu->ops->unpreserve(iommu, iommu_hw_ser);
+ iommu_hw_ser->hdr.flags |= IOMMU_SER_FLAG_DELETED;
+}
+
+static struct iommu_device_ser *alloc_iommu_device_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((void **)&flb->curr_device_array,
+ iommu_max_objs_per_page(flb->curr_device_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_device_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_device_array->objects[idx];
+}
+
+int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state)
+{
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_device_ser *device_ser;
+ struct dev_iommu *iommu;
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return -EOPNOTSUPP;
+
+ if (!iommu_group_dma_owner_claimed(dev->iommu_group))
+ return -EINVAL;
+
+ pdev = to_pci_dev(dev);
+ iommu = dev->iommu;
+ if (!iommu->iommu_dev->ops->preserve_device ||
+ !iommu->iommu_dev->ops->unpreserve_device ||
+ !iommu->iommu_dev->ops->preserve ||
+ !iommu->iommu_dev->ops->unpreserve)
+ return -EOPNOTSUPP;
+
+ ret = liveupdate_flb_get_outgoing(&iommu_flb, (void **)&flb_obj);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&flb_obj->lock);
+ if (!domain->preserved_state)
+ return -EINVAL;
+
+ device_ser = alloc_iommu_device_ser(flb_obj);
+ if (IS_ERR(device_ser))
+ return PTR_ERR(device_ser);
+
+ ret = iommu_preserve_locked(iommu->iommu_dev, flb_obj);
+ if (ret) {
+ device_ser->hdr.flags |= IOMMU_SER_FLAG_DELETED;
+ return ret;
+ }
+
+ device_ser->domain_iommu_ser.domain_phys = virt_to_phys(domain->preserved_state);
+ device_ser->domain_iommu_ser.iommu_phys = virt_to_phys(iommu->iommu_dev->outgoing_preserved_state);
+ device_ser->devid = pci_dev_id(pdev);
+ device_ser->pci_domain_nr = pci_domain_nr(pdev->bus);
+
+ ret = iommu->iommu_dev->ops->preserve_device(dev, device_ser);
+ if (ret) {
+ device_ser->hdr.flags |= IOMMU_SER_FLAG_DELETED;
+ iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
+ return ret;
+ }
+
+ dev->iommu->device_ser = device_ser;
+ *preserved_state = virt_to_phys(device_ser);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_preserve_device);
+
+void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev)
+{
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_device_ser *iommu_device_ser;
+ struct dev_iommu *iommu;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return;
+
+ if (!iommu_group_dma_owner_claimed(dev->iommu_group))
+ return;
+
+ iommu = dev->iommu;
+ if (WARN_ON(!iommu->iommu_dev->ops->unpreserve_device ||
+ !iommu->iommu_dev->ops->unpreserve))
+ return;
+
+ ret = liveupdate_flb_get_outgoing(&iommu_flb, (void **)&flb_obj);
+ if (WARN_ON(ret))
+ return;
+
+ guard(mutex)(&flb_obj->lock);
+ iommu_device_ser = dev_iommu_preserved_state(dev);
+ if (WARN_ON(!iommu_device_ser))
+ return;
+
+ dev->iommu->device_ser->hdr.flags |= IOMMU_SER_FLAG_DELETED;
+ iommu->iommu_dev->ops->unpreserve_device(dev, iommu_device_ser);
+ dev->iommu->device_ser = NULL;
+
+ iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
+}
+EXPORT_SYMBOL_GPL(iommu_unpreserve_device);
diff --git a/include/linux/iommu-liveupdate.h b/include/linux/iommu-liveupdate.h
index 0204e17996b0..f9528c94979d 100644
--- a/include/linux/iommu-liveupdate.h
+++ b/include/linux/iommu-liveupdate.h
@@ -8,14 +8,42 @@
#ifndef _LINUX_IOMMU_LIVEUPDATE_H
#define _LINUX_IOMMU_LIVEUPDATE_H
+#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/liveupdate.h>
#include <linux/kho/abi/iommu.h>
#ifdef CONFIG_IOMMU_LIVEUPDATE
+static inline void *dev_iommu_preserved_state(struct device *dev)
+{
+ struct iommu_device_ser *ser;
+
+ if (!dev->iommu)
+ return NULL;
+
+ ser = dev->iommu->device_ser;
+ if (ser && !(ser->hdr.flags & IOMMU_SER_FLAG_INCOMING))
+ return ser;
+
+ return NULL;
+}
+
int iommu_preserve_domain(struct iommu_domain *domain, struct iommu_domain_ser **ser);
void iommu_unpreserve_domain(struct iommu_domain *domain);
+int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state);
+void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev);
+
+static inline void *iommu_preserved_state(struct iommu_device *iommu)
+{
+ return iommu->outgoing_preserved_state;
+}
#else
+static inline void *dev_iommu_preserved_state(struct device *dev)
+{
+ return NULL;
+}
+
static inline int iommu_preserve_domain(struct iommu_domain *domain, struct iommu_domain_ser **ser)
{
return -EOPNOTSUPP;
@@ -24,6 +52,21 @@ static inline int iommu_preserve_domain(struct iommu_domain *domain, struct iomm
static inline void iommu_unpreserve_domain(struct iommu_domain *domain)
{
}
+
+static inline int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev)
+{
+}
+
+static inline void *iommu_preserved_state(struct iommu_device *iommu)
+{
+ return NULL;
+}
#endif
int iommu_liveupdate_register_flb(struct liveupdate_file_handler *handler);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0bea1a57f4be..b2f614367074 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -655,6 +655,10 @@ __iommu_copy_struct_to_user(const struct iommu_user_data *dst_data,
* resources shared/passed to user space IOMMU instance. Associate
* it with a nesting @parent_domain. It is required for driver to
* set @viommu->ops pointing to its own viommu_ops
+ * @preserve_device: Preserve state of a device for liveupdate.
+ * @unpreserve_device: Unpreserve state that was preserved earlier.
+ * @preserve: Preserve state of iommu translation hardware for liveupdate.
+ * @unpreserve: Unpreserve state of iommu that was preserved earlier.
* @owner: Driver module providing these ops
* @identity_domain: An always available, always attachable identity
* translation.
@@ -711,6 +715,13 @@ struct iommu_ops {
struct iommu_domain *parent_domain,
const struct iommu_user_data *user_data);
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ int (*preserve_device)(struct device *dev, struct iommu_device_ser *device_ser);
+ void (*unpreserve_device)(struct device *dev, struct iommu_device_ser *device_ser);
+ int (*preserve)(struct iommu_device *iommu, struct iommu_hw_ser *iommu_ser);
+ void (*unpreserve)(struct iommu_device *iommu, struct iommu_hw_ser *iommu_ser);
+#endif
+
const struct iommu_domain_ops *default_domain_ops;
struct module *owner;
struct iommu_domain *identity_domain;
@@ -798,6 +809,8 @@ struct iommu_domain_ops {
* @singleton_group: Used internally for drivers that have only one group
* @max_pasids: number of supported PASIDs
* @ready: set once iommu_device_register() has completed successfully
+ * @outgoing_preserved_state: preserved iommu state of outgoing kernel for
+ * liveupdate.
*/
struct iommu_device {
struct list_head list;
@@ -807,6 +820,10 @@ struct iommu_device {
struct iommu_group *singleton_group;
u32 max_pasids;
bool ready;
+
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ struct iommu_hw_ser *outgoing_preserved_state;
+#endif
};
/**
@@ -861,6 +878,9 @@ struct dev_iommu {
u32 pci_32bit_workaround:1;
u32 require_direct:1;
u32 shadow_on_flush:1;
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ struct iommu_device_ser *device_ser;
+#endif
};
int iommu_device_register(struct iommu_device *iommu,
@@ -1177,6 +1197,20 @@ static inline void *dev_iommu_priv_get(struct device *dev)
void dev_iommu_priv_set(struct device *dev, void *priv);
+typedef int (*iommu_dev_iter_fn)(struct device *dev,
+ struct iommu_device *iommu, void *arg);
+
+/**
+ * struct iommu_dev_iter - Iterator for devices attached to an IOMMU
+ */
+struct iommu_dev_iter {
+ struct iommu_device *iommu;
+ iommu_dev_iter_fn fn;
+ void *arg;
+};
+
+void iommu_for_each_dev(struct iommu_dev_iter *iter);
+
extern struct mutex iommu_probe_device_lock;
int iommu_probe_device(struct device *dev);
--
2.54.0.1136.gdb2ca164c4-goog