Re: [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation

From: Baolu Lu

Date: Wed May 06 2026 - 22:10:37 EST


On 4/28/26 01:56, Samiullah Khawaja wrote:
Add IOMMU ops to preserve/unpreserve a device. These can be implemented
by the IOMMU drivers that support preservation of devices that have
their IOMMU domains preserved. During device preservation the state of
the associated IOMMU is also preserved as dependency.

Signed-off-by: Samiullah Khawaja<skhawaja@xxxxxxxxxx>
---
drivers/iommu/liveupdate.c | 162 +++++++++++++++++++++++++++++++
include/linux/iommu-liveupdate.h | 33 +++++++
include/linux/iommu.h | 20 ++++
3 files changed, 215 insertions(+)

diff --git a/drivers/iommu/liveupdate.c b/drivers/iommu/liveupdate.c
index f71f14518248..765d042e22e3 100644
--- a/drivers/iommu/liveupdate.c
+++ b/drivers/iommu/liveupdate.c
@@ -11,6 +11,7 @@
#include <linux/liveupdate.h>
#include <linux/iommu-liveupdate.h>
#include <linux/iommu.h>
+#include <linux/pci.h>
#include <linux/errno.h>
#define iommu_max_objs_per_page(_array) \
@@ -293,3 +294,164 @@ void iommu_domain_unpreserve(struct iommu_domain *domain)
domain->preserved_state = NULL;
}
EXPORT_SYMBOL_GPL(iommu_domain_unpreserve);
+
+static struct iommu_hw_ser *alloc_iommu_hw_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_iommu_array,
+ iommu_max_objs_per_page(flb->curr_iommu_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_iommu_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_iommu_array->objects[idx];
+}
+
+static int iommu_preserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser;
+ int ret;
+
+ if (!iommu->ops->preserve)
+ return -EOPNOTSUPP;
+
+ lockdep_assert_held(&flb_obj->lock);
+ if (iommu->outgoing_preserved_state) {
+ iommu->outgoing_preserved_state->hdr.ref_count++;
+ return 0;
+ }
+
+ iommu_hw_ser = alloc_iommu_hw_ser(flb_obj);
+ if (IS_ERR(iommu_hw_ser))
+ return PTR_ERR(iommu_hw_ser);
+
+ ret = iommu->ops->preserve(iommu, iommu_hw_ser);
+ if (ret) {
+ iommu_hw_ser->hdr.deleted = true;
+ return ret;
+ }
+
+ iommu->outgoing_preserved_state = iommu_hw_ser;
+ return ret;
+}
+
+static void iommu_unpreserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser = iommu->outgoing_preserved_state;
+
+ lockdep_assert_held(&flb_obj->lock);
+ iommu_hw_ser->hdr.ref_count--;
+ if (iommu_hw_ser->hdr.ref_count)
+ return;
+
+ iommu->outgoing_preserved_state = NULL;
+ iommu->ops->unpreserve(iommu, iommu_hw_ser);
+ iommu_hw_ser->hdr.deleted = true;
+}
+
+static struct iommu_device_ser *alloc_iommu_device_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_device_array,
+ iommu_max_objs_per_page(flb->curr_device_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_device_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_device_array->objects[idx];
+}
+
+int iommu_preserve_device(struct iommu_domain *domain,
+ struct device *dev, u64 *preserved_state)
+{
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_device_ser *device_ser;
+ struct dev_iommu *iommu;
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return -EOPNOTSUPP;
+
+ if (!domain->preserved_state)
+ return -EINVAL;
+
+ if (!iommu_group_dma_owner_claimed(dev->iommu_group))
+ return -EINVAL;
+
+ pdev = to_pci_dev(dev);
+ iommu = dev->iommu;
+ if (!iommu->iommu_dev->ops->preserve_device ||
+ !iommu->iommu_dev->ops->preserve)
+ return -EOPNOTSUPP;
+
+ ret = liveupdate_flb_get_outgoing(&iommu_flb, (void **)&flb_obj);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&flb_obj->lock);
+ device_ser = alloc_iommu_device_ser(flb_obj);
+ if (IS_ERR(device_ser))
+ return PTR_ERR(device_ser);
+
+ ret = iommu_preserve_locked(iommu->iommu_dev, flb_obj);
+ if (ret) {
+ device_ser->hdr.deleted = true;
+ return ret;
+ }
+
+ device_ser->domain_iommu_ser.domain_phys = __pa(domain->preserved_state);
+ device_ser->domain_iommu_ser.iommu_phys = __pa(iommu->iommu_dev->outgoing_preserved_state);
+ device_ser->devid = pci_dev_id(pdev);
+ device_ser->pci_domain_nr = pci_domain_nr(pdev->bus);
+
+ ret = iommu->iommu_dev->ops->preserve_device(dev, device_ser);
+ if (ret) {
+ device_ser->hdr.deleted = true;
+ iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
+ return ret;
+ }
+
+ dev->iommu->device_ser = device_ser;
+ *preserved_state = virt_to_phys(device_ser);
+ return 0;
+}
+
+void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev)
+{
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_device_ser *iommu_device_ser;
+ struct dev_iommu *iommu;
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return;
+
+ if (!iommu_group_dma_owner_claimed(dev->iommu_group))
+ return;
+
+ pdev = to_pci_dev(dev);
+ iommu = dev->iommu;
+ if (!iommu->iommu_dev->ops->unpreserve_device ||
+ !iommu->iommu_dev->ops->unpreserve)
+ return;

Is it considered a driver bug if it implements the preserve hooks but
not unpreserve ones? This would at least cause a silent memory leak. How
about adding a WARN like this?

if (WARN_ON_ONCE(!iommu->iommu_dev->ops->unpreserve_device ||
!iommu->iommu_dev->ops->unpreserve))
return;

?

Thanks,
baolu