Re: [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation

From: Pranjal Shrivastava

Date: Mon May 18 2026 - 10:03:45 EST


On Mon, Apr 27, 2026 at 05:56:21PM +0000, Samiullah Khawaja wrote:
> Add IOMMU ops to preserve/unpreserve a device. These can be implemented
> by the IOMMU drivers that support preservation of devices that have
> their IOMMU domains preserved. During device preservation the state of
> the associated IOMMU is also preserved as dependency.
>
> Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
> ---
> drivers/iommu/liveupdate.c | 162 +++++++++++++++++++++++++++++++
> include/linux/iommu-liveupdate.h | 33 +++++++
> include/linux/iommu.h | 20 ++++
> 3 files changed, 215 insertions(+)
>
> diff --git a/drivers/iommu/liveupdate.c b/drivers/iommu/liveupdate.c
> index f71f14518248..765d042e22e3 100644
> --- a/drivers/iommu/liveupdate.c
> +++ b/drivers/iommu/liveupdate.c
> @@ -11,6 +11,7 @@
> #include <linux/liveupdate.h>
> #include <linux/iommu-liveupdate.h>
> #include <linux/iommu.h>
> +#include <linux/pci.h>
> #include <linux/errno.h>
>
> #define iommu_max_objs_per_page(_array) \
> @@ -293,3 +294,164 @@ void iommu_domain_unpreserve(struct iommu_domain *domain)
> domain->preserved_state = NULL;
> }
> EXPORT_SYMBOL_GPL(iommu_domain_unpreserve);
> +
> +static struct iommu_hw_ser *alloc_iommu_hw_ser(struct iommu_flb_obj *flb)
> +{
> + int idx;
> +
> + idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_iommu_array,
> + iommu_max_objs_per_page(flb->curr_iommu_array));

Nit: Same thing about brittle casts here, shall we make them void ** and
cast then within alloc_object_set ?


> + if (idx < 0)
> + return ERR_PTR(idx);
> +
> + flb->curr_iommu_array->objects[idx].hdr.ref_count = 1;
> + return &flb->curr_iommu_array->objects[idx];
> +}
> +
> +static int iommu_preserve_locked(struct iommu_device *iommu,
> + struct iommu_flb_obj *flb_obj)
> +{
> + struct iommu_hw_ser *iommu_hw_ser;
> + int ret;
> +
> + if (!iommu->ops->preserve)
> + return -EOPNOTSUPP;
> +
> + lockdep_assert_held(&flb_obj->lock);
> + if (iommu->outgoing_preserved_state) {
> + iommu->outgoing_preserved_state->hdr.ref_count++;
> + return 0;
> + }
> +
> + iommu_hw_ser = alloc_iommu_hw_ser(flb_obj);
> + if (IS_ERR(iommu_hw_ser))
> + return PTR_ERR(iommu_hw_ser);
> +
> + ret = iommu->ops->preserve(iommu, iommu_hw_ser);
> + if (ret) {
> + iommu_hw_ser->hdr.deleted = true;
> + return ret;
> + }
> +
> + iommu->outgoing_preserved_state = iommu_hw_ser;
> + return ret;
> +}
> +
> +static void iommu_unpreserve_locked(struct iommu_device *iommu,
> + struct iommu_flb_obj *flb_obj)
> +{
> + struct iommu_hw_ser *iommu_hw_ser = iommu->outgoing_preserved_state;
> +
> + lockdep_assert_held(&flb_obj->lock);
> + iommu_hw_ser->hdr.ref_count--;
> + if (iommu_hw_ser->hdr.ref_count)

Shall we add a defensive if (WARN_ON(!iommu_hw_ser)) ? I'm aware we
check this on within iommu_unpreserve_device() but we don't seem to
check it before calling iommu_unpreserve_locked() in the error path of
iommu_preserve_device.

> + return;
> +
> + iommu->outgoing_preserved_state = NULL;
> + iommu->ops->unpreserve(iommu, iommu_hw_ser);

We seem to assume we'll always have unpreserve implemented? If so, we
should check it during the iommu registration itself and fail it, i.e.

inside iommu_device_register() we could add something like:

#ifdef CONFIG_IOMMU_LIVEUPDATE
if ((iommu->ops->preserve && !iommu->ops->unpreserve) ||
(!iommu->ops->preserve && iommu->ops->unpreserve)) {
pr_err("IOMMU: %s: Asymmetric live-update operations detected\n",
dev_name(iommu->dev));
return -EINVAL;
}
#endif

This prevents a half-baked iommu driver from ever spinning up, completely
eliminating the need to check for it inside the live-update session paths.

> + iommu_hw_ser->hdr.deleted = true;
> +}
> +
> +static struct iommu_device_ser *alloc_iommu_device_ser(struct iommu_flb_obj *flb)
> +{
> + int idx;
> +
> + idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_device_array,

Nit: Same thing about brittle casts here, shall we make them void ** and
cast then within alloc_object_set ?

> + iommu_max_objs_per_page(flb->curr_device_array));
> + if (idx < 0)
> + return ERR_PTR(idx);
> +
> + flb->curr_device_array->objects[idx].hdr.ref_count = 1;
> + return &flb->curr_device_array->objects[idx];
> +}
> +
> +int iommu_preserve_device(struct iommu_domain *domain,
> + struct device *dev, u64 *preserved_state)
> +{
> + struct iommu_flb_obj *flb_obj;
> + struct iommu_device_ser *device_ser;
> + struct dev_iommu *iommu;
> + struct pci_dev *pdev;
> + int ret;
> +
> + if (!dev_is_pci(dev))
> + return -EOPNOTSUPP;
> +
> + if (!domain->preserved_state)
> + return -EINVAL;
> +
> + if (!iommu_group_dma_owner_claimed(dev->iommu_group))
> + return -EINVAL;

Nice.

> +
> + pdev = to_pci_dev(dev);
> + iommu = dev->iommu;
> + if (!iommu->iommu_dev->ops->preserve_device ||
> + !iommu->iommu_dev->ops->preserve)
> + return -EOPNOTSUPP;
> +
> + ret = liveupdate_flb_get_outgoing(&iommu_flb, (void **)&flb_obj);
> + if (ret)
> + return ret;
> +
> + guard(mutex)(&flb_obj->lock);
> + device_ser = alloc_iommu_device_ser(flb_obj);
> + if (IS_ERR(device_ser))
> + return PTR_ERR(device_ser);
> +
> + ret = iommu_preserve_locked(iommu->iommu_dev, flb_obj);
> + if (ret) {
> + device_ser->hdr.deleted = true;
> + return ret;
> + }
> +
> + device_ser->domain_iommu_ser.domain_phys = __pa(domain->preserved_state);
> + device_ser->domain_iommu_ser.iommu_phys = __pa(iommu->iommu_dev->outgoing_preserved_state);

Nit: Should these be updated to use virt_to_phys as well?

> + device_ser->devid = pci_dev_id(pdev);
> + device_ser->pci_domain_nr = pci_domain_nr(pdev->bus);
> +
> + ret = iommu->iommu_dev->ops->preserve_device(dev, device_ser);
> + if (ret) {
> + device_ser->hdr.deleted = true;
> + iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
> + return ret;
> + }
> +
> + dev->iommu->device_ser = device_ser;
> + *preserved_state = virt_to_phys(device_ser);
> + return 0;
> +}
> +

[...]

Thanks,
Praan