Re: [PATCH v10 10/19] vfio iommu: Add blocking notifier to notify DMA_UNMAP

From: Kirti Wankhede
Date: Fri Oct 28 2016 - 16:04:32 EST




On 10/28/2016 6:10 PM, Alex Williamson wrote:
> On Fri, 28 Oct 2016 15:33:58 +0800
> Jike Song <jike.song@xxxxxxxxx> wrote:
>
>> On 10/27/2016 05:29 AM, Kirti Wankhede wrote:
>>> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers
>>> about DMA_UNMAP.
>>> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier().
>>> Vendor driver should register notifer using these APIs.
>>> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate
>>> mappings.
>>>
>>> Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
>>> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx>
>>> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271
>>> ---
>>> drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++
>>> drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++-----
>>> include/linux/vfio.h | 11 +++++
>>> 3 files changed, 163 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>>> index 28b50ca14c52..ff05ac6b1e90 100644
>>> --- a/drivers/vfio/vfio.c
>>> +++ b/drivers/vfio/vfio.c
>>> @@ -1891,6 +1891,79 @@ err_unpin_pages:
>>> }
>>> EXPORT_SYMBOL(vfio_unpin_pages);
>>>
>>> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb)
>>> +{
>>> + struct vfio_container *container;
>>> + struct vfio_group *group;
>>> + struct vfio_iommu_driver *driver;
>>> + ssize_t ret;
>>> +
>>> + if (!dev || !nb)
>>> + return -EINVAL;
>>> +
>>> + group = vfio_group_get_from_dev(dev);
>>> + if (IS_ERR(group))
>>> + return PTR_ERR(group);
>>> +
>>> + ret = vfio_group_add_container_user(group);
>>> + if (ret)
>>> + goto err_register_nb;
>>> +
>>> + container = group->container;
>>> + down_read(&container->group_lock);
>>> +
>>> + driver = container->iommu_driver;
>>> + if (likely(driver && driver->ops->register_notifier))
>>> + ret = driver->ops->register_notifier(container->iommu_data, nb);
>>> + else
>>> + ret = -EINVAL;
>>> +
>>> + up_read(&container->group_lock);
>>> + vfio_group_try_dissolve_container(group);
>>> +
>>> +err_register_nb:
>>> + vfio_group_put(group);
>>> + return ret;
>>> +}
>>> +EXPORT_SYMBOL(vfio_register_notifier);
>>> +
>>> +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb)
>>> +{
>>> + struct vfio_container *container;
>>> + struct vfio_group *group;
>>> + struct vfio_iommu_driver *driver;
>>> + ssize_t ret;
>>> +
>>> + if (!dev || !nb)
>>> + return -EINVAL;
>>> +
>>> + group = vfio_group_get_from_dev(dev);
>>> + if (IS_ERR(group))
>>> + return PTR_ERR(group);
>>> +
>>> + ret = vfio_group_add_container_user(group);
>>> + if (ret)
>>> + goto err_unregister_nb;
>>> +
>>> + container = group->container;
>>> + down_read(&container->group_lock);
>>> +
>>> + driver = container->iommu_driver;
>>> + if (likely(driver && driver->ops->unregister_notifier))
>>> + ret = driver->ops->unregister_notifier(container->iommu_data,
>>> + nb);
>>> + else
>>> + ret = -EINVAL;
>>> +
>>> + up_read(&container->group_lock);
>>> + vfio_group_try_dissolve_container(group);
>>> +
>>> +err_unregister_nb:
>>> + vfio_group_put(group);
>>> + return ret;
>>> +}
>>> +EXPORT_SYMBOL(vfio_unregister_notifier);
>>> +
>>> /**
>>> * Module/class support
>>> */
>>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>>> index 5add11a147e1..a4bd331ac0fd 100644
>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>> @@ -37,6 +37,7 @@
>>> #include <linux/vfio.h>
>>> #include <linux/workqueue.h>
>>> #include <linux/mdev.h>
>>> +#include <linux/notifier.h>
>>>
>>> #define DRIVER_VERSION "0.2"
>>> #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>"
>>> @@ -59,6 +60,7 @@ struct vfio_iommu {
>>> struct vfio_domain *external_domain; /* domain for external user */
>>> struct mutex lock;
>>> struct rb_root dma_list;
>>> + struct blocking_notifier_head notifier;
>>> bool v2;
>>> bool nesting;
>>> };
>>> @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data,
>>>
>>> mutex_lock(&iommu->lock);
>>>
>>> - if (!iommu->external_domain) {
>>> + /* Fail if notifier list is empty */
>>> + if ((!iommu->external_domain) || (!iommu->notifier.head)) {
>>> ret = -EINVAL;
>>> goto pin_done;
>>> }
>>> @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
>>> return bitmap;
>>> }
>>>
>>> +/*
>>> + * This function finds pfn in domain->external_addr_space->pfn_list for given
>>> + * iova range. If pfn exist, notify pfn to registered notifier list. On
>>> + * receiving notifier callback, vendor driver should invalidate the mapping and
>>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn
>>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while
>>> + * searching for next vfio_pfn in rb tree, start search from first node again.
>>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed
>>> + * from rb tree and so in next search vfio_pfn would be same as previous
>>> + * vfio_pfn. In that case, exit from loop.
>>> + */
>>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu,
>>> + struct vfio_iommu_type1_dma_unmap *unmap)
>>> +{
>>> + struct vfio_domain *domain = iommu->external_domain;
>>> + struct rb_node *n;
>>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn;
>>> +
>>> + do {
>>> + prev_vpfn = vpfn;
>>> + mutex_lock(&domain->external_addr_space->pfn_list_lock);
>>> +
>>> + n = rb_first(&domain->external_addr_space->pfn_list);
>>> +
>>> + for (; n; n = rb_next(n), vpfn = NULL) {
>>> + vpfn = rb_entry(n, struct vfio_pfn, node);
>>> +
>>> + if ((vpfn->iova >= unmap->iova) &&
>>> + (vpfn->iova < unmap->iova + unmap->size))
>>> + break;
>>> + }
>>> +
>>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock);
>>> +
>>> + /* Notify any listeners about DMA_UNMAP */
>>> + if (vpfn)
>>> + blocking_notifier_call_chain(&iommu->notifier,
>>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP,
>>> + &vpfn->pfn);
>>
>> Hi Kirti,
>>
>> The information carried by notifier is only a pfn.
>>
>> Since your pin/unpin interfaces design, it's the vendor driver who should
>> guarantee pin/unpin same times. To achieve that, the vendor driver must
>> cache it's iova->pfn mapping on its side, to avoid pinning a same page
>> for multiple times.
>>
>> With the notifier carrying only a pfn, to find the iova by this pfn,
>> the vendor driver must *also* keep a reverse-mapping. That's a bit
>> too much.
>>
>> Since the vendor could also suffer from IOMMU-compatible problem,
>> which means a local cache is always helpful, so I'd like to have the
>> iova carried to the notifier.
>>
>> What'd you say?
>
> I agree, the pfn is not unique, multiple guest pfns (iovas) might be
> backed by the same host pfn. DMA_UNMAP calls are based on iova, the
> notifier through to the vendor driver must be based on the same.

Host pfn should be unique, right?

Kirti.