[PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation
From: Samiullah Khawaja
Date: Mon Apr 27 2026 - 14:00:14 EST
Add IOMMU ops to preserve/unpreserve a device. These can be implemented
by the IOMMU drivers that support preservation of devices that have
their IOMMU domains preserved. During device preservation the state of
the associated IOMMU is also preserved as dependency.
Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
---
drivers/iommu/liveupdate.c | 162 +++++++++++++++++++++++++++++++
include/linux/iommu-liveupdate.h | 33 +++++++
include/linux/iommu.h | 20 ++++
3 files changed, 215 insertions(+)
diff --git a/drivers/iommu/liveupdate.c b/drivers/iommu/liveupdate.c
index f71f14518248..765d042e22e3 100644
--- a/drivers/iommu/liveupdate.c
+++ b/drivers/iommu/liveupdate.c
@@ -11,6 +11,7 @@
#include <linux/liveupdate.h>
#include <linux/iommu-liveupdate.h>
#include <linux/iommu.h>
+#include <linux/pci.h>
#include <linux/errno.h>
#define iommu_max_objs_per_page(_array) \
@@ -293,3 +294,164 @@ void iommu_domain_unpreserve(struct iommu_domain *domain)
domain->preserved_state = NULL;
}
EXPORT_SYMBOL_GPL(iommu_domain_unpreserve);
+
+static struct iommu_hw_ser *alloc_iommu_hw_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_iommu_array,
+ iommu_max_objs_per_page(flb->curr_iommu_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_iommu_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_iommu_array->objects[idx];
+}
+
+static int iommu_preserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser;
+ int ret;
+
+ if (!iommu->ops->preserve)
+ return -EOPNOTSUPP;
+
+ lockdep_assert_held(&flb_obj->lock);
+ if (iommu->outgoing_preserved_state) {
+ iommu->outgoing_preserved_state->hdr.ref_count++;
+ return 0;
+ }
+
+ iommu_hw_ser = alloc_iommu_hw_ser(flb_obj);
+ if (IS_ERR(iommu_hw_ser))
+ return PTR_ERR(iommu_hw_ser);
+
+ ret = iommu->ops->preserve(iommu, iommu_hw_ser);
+ if (ret) {
+ iommu_hw_ser->hdr.deleted = true;
+ return ret;
+ }
+
+ iommu->outgoing_preserved_state = iommu_hw_ser;
+ return ret;
+}
+
+static void iommu_unpreserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser = iommu->outgoing_preserved_state;
+
+ lockdep_assert_held(&flb_obj->lock);
+ iommu_hw_ser->hdr.ref_count--;
+ if (iommu_hw_ser->hdr.ref_count)
+ return;
+
+ iommu->outgoing_preserved_state = NULL;
+ iommu->ops->unpreserve(iommu, iommu_hw_ser);
+ iommu_hw_ser->hdr.deleted = true;
+}
+
+static struct iommu_device_ser *alloc_iommu_device_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_device_array,
+ iommu_max_objs_per_page(flb->curr_device_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_device_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_device_array->objects[idx];
+}
+
+int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state)
+{
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_device_ser *device_ser;
+ struct dev_iommu *iommu;
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return -EOPNOTSUPP;
+
+ if (!domain->preserved_state)
+ return -EINVAL;
+
+ if (!iommu_group_dma_owner_claimed(dev->iommu_group))
+ return -EINVAL;
+
+ pdev = to_pci_dev(dev);
+ iommu = dev->iommu;
+ if (!iommu->iommu_dev->ops->preserve_device ||
+ !iommu->iommu_dev->ops->preserve)
+ return -EOPNOTSUPP;
+
+ ret = liveupdate_flb_get_outgoing(&iommu_flb, (void **)&flb_obj);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&flb_obj->lock);
+ device_ser = alloc_iommu_device_ser(flb_obj);
+ if (IS_ERR(device_ser))
+ return PTR_ERR(device_ser);
+
+ ret = iommu_preserve_locked(iommu->iommu_dev, flb_obj);
+ if (ret) {
+ device_ser->hdr.deleted = true;
+ return ret;
+ }
+
+ device_ser->domain_iommu_ser.domain_phys = __pa(domain->preserved_state);
+ device_ser->domain_iommu_ser.iommu_phys = __pa(iommu->iommu_dev->outgoing_preserved_state);
+ device_ser->devid = pci_dev_id(pdev);
+ device_ser->pci_domain_nr = pci_domain_nr(pdev->bus);
+
+ ret = iommu->iommu_dev->ops->preserve_device(dev, device_ser);
+ if (ret) {
+ device_ser->hdr.deleted = true;
+ iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
+ return ret;
+ }
+
+ dev->iommu->device_ser = device_ser;
+ *preserved_state = virt_to_phys(device_ser);
+ return 0;
+}
+
+void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev)
+{
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_device_ser *iommu_device_ser;
+ struct dev_iommu *iommu;
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return;
+
+ if (!iommu_group_dma_owner_claimed(dev->iommu_group))
+ return;
+
+ pdev = to_pci_dev(dev);
+ iommu = dev->iommu;
+ if (!iommu->iommu_dev->ops->unpreserve_device ||
+ !iommu->iommu_dev->ops->unpreserve)
+ return;
+
+ ret = liveupdate_flb_get_outgoing(&iommu_flb, (void **)&flb_obj);
+ if (WARN_ON(ret))
+ return;
+
+ guard(mutex)(&flb_obj->lock);
+ iommu_device_ser = dev_iommu_preserved_state(dev);
+ if (WARN_ON(!iommu_device_ser))
+ return;
+
+ iommu->iommu_dev->ops->unpreserve_device(dev, iommu_device_ser);
+ dev->iommu->device_ser = NULL;
+
+ iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
+}
diff --git a/include/linux/iommu-liveupdate.h b/include/linux/iommu-liveupdate.h
index 6019cfc27428..279c7ab04f09 100644
--- a/include/linux/iommu-liveupdate.h
+++ b/include/linux/iommu-liveupdate.h
@@ -8,14 +8,37 @@
#ifndef _LINUX_IOMMU_LIVEUPDATE_H
#define _LINUX_IOMMU_LIVEUPDATE_H
+#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/liveupdate.h>
#include <linux/kho/abi/iommu.h>
#ifdef CONFIG_IOMMU_LIVEUPDATE
+static inline void *dev_iommu_preserved_state(struct device *dev)
+{
+ struct iommu_device_ser *ser;
+
+ if (!dev->iommu)
+ return NULL;
+
+ ser = dev->iommu->device_ser;
+ if (ser && !ser->hdr.incoming)
+ return ser;
+
+ return NULL;
+}
+
int iommu_domain_preserve(struct iommu_domain *domain, struct iommu_domain_ser **ser);
void iommu_domain_unpreserve(struct iommu_domain *domain);
+int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state);
+void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev);
#else
+static inline void *dev_iommu_preserved_state(struct device *dev)
+{
+ return NULL;
+}
+
static inline int iommu_domain_preserve(struct iommu_domain *domain, struct iommu_domain_ser **ser)
{
return -EOPNOTSUPP;
@@ -24,6 +47,16 @@ static inline int iommu_domain_preserve(struct iommu_domain *domain, struct iomm
static inline void iommu_domain_unpreserve(struct iommu_domain *domain)
{
}
+
+static inline int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev)
+{
+}
#endif
int iommu_liveupdate_register_flb(struct liveupdate_file_handler *handler);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3853a3946733..1c424b32c5fc 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -655,6 +655,10 @@ __iommu_copy_struct_to_user(const struct iommu_user_data *dst_data,
* resources shared/passed to user space IOMMU instance. Associate
* it with a nesting @parent_domain. It is required for driver to
* set @viommu->ops pointing to its own viommu_ops
+ * @preserve_device: Preserve state of a device for liveupdate.
+ * @unpreserve_device: Unpreserve state that was preserved earlier.
+ * @preserve: Preserve state of iommu translation hardware for liveupdate.
+ * @unpreserve: Unpreserve state of iommu that was preserved earlier.
* @owner: Driver module providing these ops
* @identity_domain: An always available, always attachable identity
* translation.
@@ -711,6 +715,13 @@ struct iommu_ops {
struct iommu_domain *parent_domain,
const struct iommu_user_data *user_data);
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ int (*preserve_device)(struct device *dev, struct iommu_device_ser *device_ser);
+ void (*unpreserve_device)(struct device *dev, struct iommu_device_ser *device_ser);
+ int (*preserve)(struct iommu_device *iommu, struct iommu_hw_ser *iommu_ser);
+ void (*unpreserve)(struct iommu_device *iommu, struct iommu_hw_ser *iommu_ser);
+#endif
+
const struct iommu_domain_ops *default_domain_ops;
struct module *owner;
struct iommu_domain *identity_domain;
@@ -806,6 +817,8 @@ struct iommu_domain_ops {
* @singleton_group: Used internally for drivers that have only one group
* @max_pasids: number of supported PASIDs
* @ready: set once iommu_device_register() has completed successfully
+ * @outgoing_preserved_state: preserved iommu state of outgoing kernel for
+ * liveupdate.
*/
struct iommu_device {
struct list_head list;
@@ -815,6 +828,10 @@ struct iommu_device {
struct iommu_group *singleton_group;
u32 max_pasids;
bool ready;
+
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ struct iommu_hw_ser *outgoing_preserved_state;
+#endif
};
/**
@@ -869,6 +886,9 @@ struct dev_iommu {
u32 pci_32bit_workaround:1;
u32 require_direct:1;
u32 shadow_on_flush:1;
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ struct iommu_device_ser *device_ser;
+#endif
};
int iommu_device_register(struct iommu_device *iommu,
--
2.54.0.545.g6539524ca2-goog