Re: [PATCH v10 10/19] vfio iommu: Add blocking notifier to notify DMA_UNMAP

From: Jike Song
Date: Fri Oct 28 2016 - 03:37:20 EST


On 10/27/2016 05:29 AM, Kirti Wankhede wrote:
> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers
> about DMA_UNMAP.
> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier().
> Vendor driver should register notifer using these APIs.
> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate
> mappings.
>
> Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx>
> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271
> ---
> drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++
> drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++-----
> include/linux/vfio.h | 11 +++++
> 3 files changed, 163 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 28b50ca14c52..ff05ac6b1e90 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1891,6 +1891,79 @@ err_unpin_pages:
> }
> EXPORT_SYMBOL(vfio_unpin_pages);
>
> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + ssize_t ret;
> +
> + if (!dev || !nb)
> + return -EINVAL;
> +
> + group = vfio_group_get_from_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> +
> + ret = vfio_group_add_container_user(group);
> + if (ret)
> + goto err_register_nb;
> +
> + container = group->container;
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (likely(driver && driver->ops->register_notifier))
> + ret = driver->ops->register_notifier(container->iommu_data, nb);
> + else
> + ret = -EINVAL;
> +
> + up_read(&container->group_lock);
> + vfio_group_try_dissolve_container(group);
> +
> +err_register_nb:
> + vfio_group_put(group);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfio_register_notifier);
> +
> +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + ssize_t ret;
> +
> + if (!dev || !nb)
> + return -EINVAL;
> +
> + group = vfio_group_get_from_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> +
> + ret = vfio_group_add_container_user(group);
> + if (ret)
> + goto err_unregister_nb;
> +
> + container = group->container;
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (likely(driver && driver->ops->unregister_notifier))
> + ret = driver->ops->unregister_notifier(container->iommu_data,
> + nb);
> + else
> + ret = -EINVAL;
> +
> + up_read(&container->group_lock);
> + vfio_group_try_dissolve_container(group);
> +
> +err_unregister_nb:
> + vfio_group_put(group);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfio_unregister_notifier);
> +
> /**
> * Module/class support
> */
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 5add11a147e1..a4bd331ac0fd 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -37,6 +37,7 @@
> #include <linux/vfio.h>
> #include <linux/workqueue.h>
> #include <linux/mdev.h>
> +#include <linux/notifier.h>
>
> #define DRIVER_VERSION "0.2"
> #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>"
> @@ -59,6 +60,7 @@ struct vfio_iommu {
> struct vfio_domain *external_domain; /* domain for external user */
> struct mutex lock;
> struct rb_root dma_list;
> + struct blocking_notifier_head notifier;
> bool v2;
> bool nesting;
> };
> @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data,
>
> mutex_lock(&iommu->lock);
>
> - if (!iommu->external_domain) {
> + /* Fail if notifier list is empty */
> + if ((!iommu->external_domain) || (!iommu->notifier.head)) {
> ret = -EINVAL;
> goto pin_done;
> }
> @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
> return bitmap;
> }
>
> +/*
> + * This function finds pfn in domain->external_addr_space->pfn_list for given
> + * iova range. If pfn exist, notify pfn to registered notifier list. On
> + * receiving notifier callback, vendor driver should invalidate the mapping and
> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn
> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while
> + * searching for next vfio_pfn in rb tree, start search from first node again.
> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed
> + * from rb tree and so in next search vfio_pfn would be same as previous
> + * vfio_pfn. In that case, exit from loop.
> + */
> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu,
> + struct vfio_iommu_type1_dma_unmap *unmap)
> +{
> + struct vfio_domain *domain = iommu->external_domain;
> + struct rb_node *n;
> + struct vfio_pfn *vpfn = NULL, *prev_vpfn;
> +
> + do {
> + prev_vpfn = vpfn;
> + mutex_lock(&domain->external_addr_space->pfn_list_lock);
> +
> + n = rb_first(&domain->external_addr_space->pfn_list);
> +
> + for (; n; n = rb_next(n), vpfn = NULL) {
> + vpfn = rb_entry(n, struct vfio_pfn, node);
> +
> + if ((vpfn->iova >= unmap->iova) &&
> + (vpfn->iova < unmap->iova + unmap->size))
> + break;
> + }
> +
> + mutex_unlock(&domain->external_addr_space->pfn_list_lock);
> +
> + /* Notify any listeners about DMA_UNMAP */
> + if (vpfn)
> + blocking_notifier_call_chain(&iommu->notifier,
> + VFIO_IOMMU_NOTIFY_DMA_UNMAP,
> + &vpfn->pfn);

Hi Kirti,

The information carried by notifier is only a pfn.

Since your pin/unpin interfaces design, it's the vendor driver who should
guarantee pin/unpin same times. To achieve that, the vendor driver must
cache it's iova->pfn mapping on its side, to avoid pinning a same page
for multiple times.

With the notifier carrying only a pfn, to find the iova by this pfn,
the vendor driver must *also* keep a reverse-mapping. That's a bit
too much.

Since the vendor could also suffer from IOMMU-compatible problem,
which means a local cache is always helpful, so I'd like to have the
iova carried to the notifier.

What'd you say?

--
Thanks,
Jike

> + } while (vpfn && (prev_vpfn != vpfn));
> +
> + WARN_ON(vpfn);
> +}
> +
> static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> struct vfio_iommu_type1_dma_unmap *unmap)
> {
> @@ -844,6 +891,9 @@ unlock:
> /* Report how much was unmapped */
> unmap->size = unmapped;
>
> + if (unmapped && iommu->external_domain)
> + vfio_notifier_call_chain(iommu, unmap);
> +
> return ret;
> }
>
> @@ -1418,6 +1468,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
> INIT_LIST_HEAD(&iommu->domain_list);
> iommu->dma_list = RB_ROOT;
> mutex_init(&iommu->lock);
> + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>
> return iommu;
> }
> @@ -1555,16 +1606,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> return -ENOTTY;
> }
>
> +static int vfio_iommu_type1_register_notifier(void *iommu_data,
> + struct notifier_block *nb)
> +{
> + struct vfio_iommu *iommu = iommu_data;
> +
> + return blocking_notifier_chain_register(&iommu->notifier, nb);
> +}
> +
> +static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
> + struct notifier_block *nb)
> +{
> + struct vfio_iommu *iommu = iommu_data;
> +
> + return blocking_notifier_chain_unregister(&iommu->notifier, nb);
> +}
> +
> static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
> - .name = "vfio-iommu-type1",
> - .owner = THIS_MODULE,
> - .open = vfio_iommu_type1_open,
> - .release = vfio_iommu_type1_release,
> - .ioctl = vfio_iommu_type1_ioctl,
> - .attach_group = vfio_iommu_type1_attach_group,
> - .detach_group = vfio_iommu_type1_detach_group,
> - .pin_pages = vfio_iommu_type1_pin_pages,
> - .unpin_pages = vfio_iommu_type1_unpin_pages,
> + .name = "vfio-iommu-type1",
> + .owner = THIS_MODULE,
> + .open = vfio_iommu_type1_open,
> + .release = vfio_iommu_type1_release,
> + .ioctl = vfio_iommu_type1_ioctl,
> + .attach_group = vfio_iommu_type1_attach_group,
> + .detach_group = vfio_iommu_type1_detach_group,
> + .pin_pages = vfio_iommu_type1_pin_pages,
> + .unpin_pages = vfio_iommu_type1_unpin_pages,
> + .register_notifier = vfio_iommu_type1_register_notifier,
> + .unregister_notifier = vfio_iommu_type1_unregister_notifier,
> };
>
> static int __init vfio_iommu_type1_init(void)
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 0609a2052846..4c91ce8bfaeb 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops {
> unsigned long *phys_pfn);
> long (*unpin_pages)(void *iommu_data, unsigned long *pfn,
> long npage);
> + int (*register_notifier)(void *iommu_data,
> + struct notifier_block *nb);
> + int (*unregister_notifier)(void *iommu_data,
> + struct notifier_block *nb);
> };
>
> extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
> @@ -137,6 +141,13 @@ extern long vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> extern long vfio_unpin_pages(struct device *dev, unsigned long *pfn,
> long npage);
>
> +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP 1
> +
> +extern int vfio_register_notifier(struct device *dev,
> + struct notifier_block *nb);
> +
> +extern int vfio_unregister_notifier(struct device *dev,
> + struct notifier_block *nb);
> /*
> * IRQfd - generic
> */
>