Re: [PATCH 01/14] iommu: Implement IOMMU LU FLB callbacks
From: Vipin Sharma
Date: Mon Mar 16 2026 - 18:56:40 EST
On Tue, Feb 03, 2026 at 10:09:35PM +0000, Samiullah Khawaja wrote:
> +config IOMMU_LIVEUPDATE
> + bool "IOMMU live update state preservation support"
> + depends on LIVEUPDATE && IOMMUFD
> + help
> + Enable support for preserving IOMMU state across a kexec live update.
> +
> + This allows devices managed by iommufd to maintain their DMA mappings
> + during kexec base kernel update.
> +
> + If unsure, say N.
> +
Do we need a separate config? Can't we just use CONFIG_LIVEUPDATE?
> menuconfig IOMMU_SUPPORT
> bool "IOMMU Hardware Support"
> depends on MMU
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 0275821f4ef9..b3715c5a6b97 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -15,6 +15,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
> obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE_KUNIT_TEST) += io-pgtable-arm-selftests.o
> obj-$(CONFIG_IOMMU_IO_PGTABLE_DART) += io-pgtable-dart.o
> +obj-$(CONFIG_IOMMU_LIVEUPDATE) += liveupdate.o
It seems like there is a sorted order for CONFIG_IOMMU_* in the
Makefile, lets keep it same if possible.
> +static void iommu_liveupdate_free_objs(u64 next, bool incoming)
> +{
> + struct iommu_objs_ser *objs;
> +
> + while (next) {
> + objs = __va(next);
There is also call to phys_to_virt() in other functions in this patch.
Should we use the same here to be consistent?
> + next = objs->next_objs;
> +
> + if (!incoming)
> + kho_unpreserve_free(objs);
> + else
> + folio_put(virt_to_folio(objs));
> + }
> +}
Instead of passing boolean, and calling with different arguments, I
think it will be simpler to just have two functions
- iommu_liveupdate_unpreserve()
- iommu_liveupdate_folio_put()
> +
> +static void iommu_liveupdate_flb_free(struct iommu_lu_flb_obj *obj)
> +{
> + if (obj->iommu_domains)
> + iommu_liveupdate_free_objs(obj->ser->iommu_domains_phys, false);
> +
> + if (obj->devices)
> + iommu_liveupdate_free_objs(obj->ser->devices_phys, false);
> +
> + if (obj->iommus)
> + iommu_liveupdate_free_objs(obj->ser->iommus_phys, false);
> +
> + kho_unpreserve_free(obj->ser);
> + kfree(obj);
> +}
> +
> +static int iommu_liveupdate_flb_preserve(struct liveupdate_flb_op_args *argp)
> +{
> + struct iommu_lu_flb_obj *obj;
> + struct iommu_lu_flb_ser *ser;
> + void *mem;
> +
> + obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> + if (!obj)
> + return -ENOMEM;
> +
> + mutex_init(&obj->lock);
> + mem = kho_alloc_preserve(sizeof(*ser));
> + if (IS_ERR(mem))
> + goto err_free;
> +
> + ser = mem;
> + obj->ser = ser;
> +
> + mem = kho_alloc_preserve(PAGE_SIZE);
> + if (IS_ERR(mem))
> + goto err_free;
> +
> + obj->iommu_domains = mem;
> + ser->iommu_domains_phys = virt_to_phys(obj->iommu_domains);
> +
> + mem = kho_alloc_preserve(PAGE_SIZE);
> + if (IS_ERR(mem))
> + goto err_free;
> +
> + obj->devices = mem;
> + ser->devices_phys = virt_to_phys(obj->devices);
> +
> + mem = kho_alloc_preserve(PAGE_SIZE);
> + if (IS_ERR(mem))
> + goto err_free;
> +
> + obj->iommus = mem;
> + ser->iommus_phys = virt_to_phys(obj->iommus);
> +
> + argp->obj = obj;
> + argp->data = virt_to_phys(ser);
> + return 0;
> +
> +err_free:
> + iommu_liveupdate_flb_free(obj);
Generally, I have seen in the function goto will call corresponding
error tags, and free corresponding allocations and all the one which
happend before. It is easier to read code that way. I know you are
combining the free call from iommu_liveupdate_flb_unpreserve() also.
IMHO, code readability will be better this way.
> + return PTR_ERR(mem);
> +}
> +
> +static void iommu_liveupdate_flb_unpreserve(struct liveupdate_flb_op_args *argp)
> +{
> + iommu_liveupdate_flb_free(argp->obj);
> +}
> +
> +static void iommu_liveupdate_flb_finish(struct liveupdate_flb_op_args *argp)
> +{
> + struct iommu_lu_flb_obj *obj = argp->obj;
> +
> + if (obj->iommu_domains)
> + iommu_liveupdate_free_objs(obj->ser->iommu_domains_phys, true);
Can there be the case where obj->iommu_domains is NULL but
obj->ser->iommu_domains_phys is not? If that is not possible, I will
just simplify the patch and unconditionally call
iommu_liveupdate_free_objs()?
> +
> +static int iommu_liveupdate_flb_retrieve(struct liveupdate_flb_op_args *argp)
> +{
> + struct iommu_lu_flb_obj *obj;
> + struct iommu_lu_flb_ser *ser;
> +
> + obj = kzalloc(sizeof(*obj), GFP_ATOMIC);
> + if (!obj)
> + return -ENOMEM;
Is kzalloc() failure here recoverable whereas iommu_liveupdate_restore_objs()
below is not? If it is not recoverable should there be a BUG_ON here?
> +
> + mutex_init(&obj->lock);
> + BUG_ON(!kho_restore_folio(argp->data));
> + ser = phys_to_virt(argp->data);
> + obj->ser = ser;
> +
> + iommu_liveupdate_restore_objs(ser->iommu_domains_phys);
> + obj->iommu_domains = phys_to_virt(ser->iommu_domains_phys);
Can iommu_liveupdate_restore_obj() just return virtual address and we
can simplify code to:
obj->iommu_domains = iommu_liveupdate_restore_objs(ser->iommu_domains_phys);
> +
> + iommu_liveupdate_restore_objs(ser->devices_phys);
> + obj->devices = phys_to_virt(ser->devices_phys);
> +
> + iommu_liveupdate_restore_objs(ser->iommus_phys);
> + obj->iommus = phys_to_virt(ser->iommus_phys);
> +
> + argp->obj = obj;
> +
> + return 0;
> +}
> +
> diff --git a/include/linux/iommu-lu.h b/include/linux/iommu-lu.h
I will recommend to use full name and not short "lu". iommu-liveupdate.h
seems more readable and not too long.
> +#define MAX_IOMMU_SERS ((PAGE_SIZE - sizeof(struct iommus_ser)) / sizeof(struct iommu_ser))
> +#define MAX_IOMMU_DOMAIN_SERS \
> + ((PAGE_SIZE - sizeof(struct iommu_domains_ser)) / sizeof(struct iommu_domain_ser))
> +#define MAX_DEVICE_SERS ((PAGE_SIZE - sizeof(struct devices_ser)) / sizeof(struct device_ser))
This is per page limit, not whole serialization limit. May be we can
name something like:
- MAX_IOMMU_SERS_PER_PAGE, or
- MAX_IOMMU_SERS_PAGE_CAPACITY
> +
> +struct iommu_lu_flb_obj {
> + struct mutex lock;
> + struct iommu_lu_flb_ser *ser;
> +
> + struct iommu_domains_ser *iommu_domains;
> + struct iommus_ser *iommus;
> + struct devices_ser *devices;
> +} __packed;
> +
I think naming scheme used here is little hard to absorb when we have so
many individual structs in this header file. Specifically, struct names like:
- iommu_domains_ser vs iommu_domain_ser
- iommus_ser vs iommu_ser
- devices_ser vs device_ser
- iommu_objs_ser vs iommu_obj_ser
First three are showing container and its elements relation, however,
last one doesn't have that relation but naming is same there.
I will recommend to change the naming scheme of containers to something like:
struct iommu_domain_ser_[hdr|header|table|arr] {};
struct iommu_ser_hdr {}
struct device_ser_hdr {}
Individual element of container can be same.
For objs, something like:
iommu_objs_ser -> iommu_hdr_meta