Re: [PATCH V4 03/10] iommufd: Allow binding to a noiommu device

From: Jacob Pan

Date: Wed May 06 2026 - 14:53:33 EST

Hi Yi,

On Tue, 28 Apr 2026 15:45:56 +0800
Yi Liu <yi.l.liu@xxxxxxxxx> wrote:

> On 4/15/26 05:14, Jacob Pan wrote:
> > From: Jason Gunthorpe <jgg@xxxxxxxxxx>
> >
> > Allow iommufd to bind devices without an IOMMU (noiommu mode) by
> > creating a dummy IOMMU group for such devices and skipping hwpt
> > operations.
> >
> > This enables noiommu devices to operate through the same iommufd
> > API as IOMMU- capable devices.
> >
> > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
> > Signed-off-by: Jacob Pan <jacob.pan@xxxxxxxxxxxxxxxxxxx>
> >
> > ---
> > v4:
> > - handle partially initialized idev w/o igroup (Sashiko)
> > v3:
> > - handle new error cases iommufd_device_bind. (Mostafa)
> > ---
> > drivers/iommu/iommufd/device.c | 132
> > +++++++++++++++++++++++---------- 1 file changed, 93 insertions(+),
> > 39 deletions(-)
> >
> > diff --git a/drivers/iommu/iommufd/device.c
> > b/drivers/iommu/iommufd/device.c index 2f30ea069f66..0283ac39be55
> > 100644 --- a/drivers/iommu/iommufd/device.c
> > +++ b/drivers/iommu/iommufd/device.c
> > @@ -23,6 +23,11 @@ struct iommufd_attach {
> > struct xarray device_array;
> > };
> >
> > +static bool is_vfio_noiommu(struct iommufd_device *idev)
>
> perhaps iommufd_device_is_noiommu().
>
sounds good, remove vfio.

> > +{
> > + return !device_iommu_mapped(idev->dev) ||
> > !idev->dev->iommu; +}
> > +
> > static void iommufd_group_release(struct kref *kref)
> > {
> > struct iommufd_group *igroup =
> > @@ -30,9 +35,11 @@ static void iommufd_group_release(struct kref
> > *kref)
> > WARN_ON(!xa_empty(&igroup->pasid_attach));
> >
> > - xa_cmpxchg(&igroup->ictx->groups,
> > iommu_group_id(igroup->group), igroup,
> > - NULL, GFP_KERNEL);
> > - iommu_group_put(igroup->group);
> > + if (igroup->group) {
> > + xa_cmpxchg(&igroup->ictx->groups,
> > iommu_group_id(igroup->group),
> > + igroup, NULL, GFP_KERNEL);
> > + iommu_group_put(igroup->group);
> > + }
> > mutex_destroy(&igroup->lock);
> > kfree(igroup);
> > }
> > @@ -204,32 +211,19 @@ void iommufd_device_destroy(struct
> > iommufd_object *obj) struct iommufd_device *idev =
> > container_of(obj, struct iommufd_device, obj);
> >
> > - iommu_device_release_dma_owner(idev->dev);
> > + if (!idev->igroup)
> > + return;
> > + if (!is_vfio_noiommu(idev))
> > + iommu_device_release_dma_owner(idev->dev);
> > iommufd_put_group(idev->igroup);
> > if (!iommufd_selftest_is_mock_dev(idev->dev))
> > iommufd_ctx_put(idev->ictx);
> > }
> >
> > -/**
> > - * iommufd_device_bind - Bind a physical device to an iommu fd
> > - * @ictx: iommufd file descriptor
> > - * @dev: Pointer to a physical device struct
> > - * @id: Output ID number to return to userspace for this device
> > - *
> > - * A successful bind establishes an ownership over the device and
> > returns
> > - * struct iommufd_device pointer, otherwise returns error pointer.
> > - *
> > - * A driver using this API must set driver_managed_dma and must
> > not touch
> > - * the device until this routine succeeds and establishes
> > ownership.
> > - *
> > - * Binding a PCI device places the entire RID under iommufd
> > control.
> > - *
> > - * The caller must undo this with iommufd_device_unbind()
> > - */
> > -struct iommufd_device *iommufd_device_bind(struct iommufd_ctx
> > *ictx,
> > - struct device *dev, u32
> > *id) +static int iommufd_bind_iommu(struct iommufd_device *idev)
> > {
> > - struct iommufd_device *idev;
> > + struct iommufd_ctx *ictx = idev->ictx;
> > + struct device *dev = idev->dev;
> > struct iommufd_group *igroup;
> > int rc;
> >
> > @@ -238,11 +232,11 @@ struct iommufd_device
> > *iommufd_device_bind(struct iommufd_ctx *ictx,
> > * to restore cache coherency.
> > */
> > if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> > - return ERR_PTR(-EINVAL);
> > + return -EINVAL;
> >
> > - igroup = iommufd_get_group(ictx, dev);
> > + igroup = iommufd_get_group(idev->ictx, dev);
>
> no need as you already have a local ictx.
will do.

>
> > if (IS_ERR(igroup))
> > - return ERR_CAST(igroup);
> > + return PTR_ERR(igroup);
> >
> > /*
> > * For historical compat with VFIO the insecure interrupt
> > path is @@ -268,21 +262,69 @@ struct iommufd_device
> > *iommufd_device_bind(struct iommufd_ctx *ictx, if (rc)
> > goto out_group_put;
> >
> > + /* igroup refcount moves into iommufd_device */
> > + idev->igroup = igroup;
> > + return 0;
> > +
> > +out_group_put:
> > + iommufd_put_group(igroup);
> > + return rc;
> > +}
> > +
> > +/**
> > + * iommufd_device_bind - Bind a physical device to an iommu fd
> > + * @ictx: iommufd file descriptor
> > + * @dev: Pointer to a physical device struct
> > + * @id: Output ID number to return to userspace for this device
> > + *
> > + * A successful bind establishes an ownership over the device and
> > returns
> > + * struct iommufd_device pointer, otherwise returns error pointer.
> > + *
> > + * A driver using this API must set driver_managed_dma and must
> > not touch
> > + * the device until this routine succeeds and establishes
> > ownership.
> > + *
> > + * Binding a PCI device places the entire RID under iommufd
> > control.
> > + *
> > + * The caller must undo this with iommufd_device_unbind()
> > + */
> > +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx
> > *ictx,
> > + struct device *dev, u32
> > *id) +{
> > + struct iommufd_device *idev;
> > + int rc;
> > +
> > idev = iommufd_object_alloc(ictx, idev,
> > IOMMUFD_OBJ_DEVICE);
> > - if (IS_ERR(idev)) {
> > - rc = PTR_ERR(idev);
> > - goto out_release_owner;
> > - }
> > + if (IS_ERR(idev))
> > + return idev;
> > +
> > idev->ictx = ictx;
> > - if (!iommufd_selftest_is_mock_dev(dev))
> > - iommufd_ctx_get(ictx);
> > idev->dev = dev;
> > idev->enforce_cache_coherency =
> > device_iommu_capable(dev,
> > IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
>
> Functioanlly, it is harmless since device_iommu_capable() returns
> false for noiommu device. But conceptually, it is nice to move it to
> iommufd_bind_iommu().
make sense, will move it.
>
> > +
> > + if (!is_vfio_noiommu(idev)) {
> > + rc = iommufd_bind_iommu(idev);
> > + if (rc)
> > + goto err_out;
> > + } else {
> > + struct iommufd_group *igroup;
> > +
> > + /*
> > + * Create a dummy igroup, lots of stuff expects
> > ths igroup to be
> > + * present, but a NULL igroup->group is OK
> > + */
> > + igroup = iommufd_alloc_group(ictx, NULL);
> > + if (IS_ERR(igroup)) {
> > + rc = PTR_ERR(igroup);
> > + goto err_out;
> > + }
> > + idev->igroup = igroup;
> > + }
> > +
> > + if (!iommufd_selftest_is_mock_dev(dev))
> > + iommufd_ctx_get(ictx);
> > /* The calling driver is a user until
> > iommufd_device_unbind() */ refcount_inc(&idev->obj.users);
> > - /* igroup refcount moves into iommufd_device */
> > - idev->igroup = igroup;
> >
> > /*
> > * If the caller fails after this success it must call
> > @@ -294,11 +336,14 @@ struct iommufd_device
> > *iommufd_device_bind(struct iommufd_ctx *ictx, *id = idev->obj.id;
> > return idev;
> >
> > -out_release_owner:
> > - iommu_device_release_dma_owner(dev);
> > -out_group_put:
> > - iommufd_put_group(igroup);
> > +err_out:
> > + /*
> > + * Be careful that iommufd_device_destroy() can handle
> > partial
> > + * initialization.
> > + */
>
> no native speaker. This is a bit strange to me. I think the comment
> wants to highligh that iommufd_device_destroy() can handle partial
> initialization. But the above comment seems to warn something...
> how about below?
>
yes, that is better, This is not a warning, just explaining.

> /*
> * iommufd_device_destroy() handles partially initialized idev,
> * so iommufd_object_abort_and_destroy() is safe to call here.
> */
>
> > + iommufd_object_abort_and_destroy(ictx, &idev->obj);
> > return ERR_PTR(rc);
> > +
> > }
> > EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD");
> >
> > @@ -512,6 +557,9 @@ static int iommufd_hwpt_attach_device(struct
> > iommufd_hw_pagetable *hwpt, struct iommufd_attach_handle *handle;
> > int rc;
> >
> > + if (is_vfio_noiommu(idev))
> > + return 0;
> > +
> > if (!iommufd_hwpt_compatible_device(hwpt, idev))
> > return -EINVAL;
> >
> > @@ -559,6 +607,9 @@ static void iommufd_hwpt_detach_device(struct
> > iommufd_hw_pagetable *hwpt, {
> > struct iommufd_attach_handle *handle;
> >
> > + if (is_vfio_noiommu(idev))
> > + return;
> > +
> > handle = iommufd_device_get_attach_handle(idev, pasid);
> > if (pasid == IOMMU_NO_PASID)
> > iommu_detach_group_handle(hwpt->domain,
> > idev->igroup->group); @@ -577,6 +628,9 @@ static int
> > iommufd_hwpt_replace_device(struct iommufd_device *idev, struct
> > iommufd_attach_handle *handle, *old_handle; int rc;
> >
> > + if (is_vfio_noiommu(idev))
> > + return 0;
> > +
> > if (!iommufd_hwpt_compatible_device(hwpt, idev))
> > return -EINVAL;
> >
> > @@ -652,7 +706,7 @@ int iommufd_hw_pagetable_attach(struct
> > iommufd_hw_pagetable *hwpt, goto err_release_devid;
> > }
> >
> > - if (attach_resv) {
> > + if (attach_resv && !is_vfio_noiommu(idev)) {
>
> how about the iommufd_hw_pagetable_detach() path? Does it also need to
> check noiommu device?
I don't think it is needed. iopt_remove_reserved_iova() is safe to call
when nothing was added — it's a no-op. But I can add the guard for
symmetry if preferred.

>
> > rc = iommufd_device_attach_reserved_iova(idev,
> > hwpt_paging); if (rc)
> > goto err_release_devid;
>
> Regards,
> Yi Liu