Re: [PATCH v13 11/22] vfio iommu: Add blocking notifier to notify DMA_UNMAP

From: Alex Williamson
Date: Tue Nov 15 2016 - 22:16:21 EST


On Wed, 16 Nov 2016 08:16:15 +0530
Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote:

> On 11/16/2016 3:49 AM, Alex Williamson wrote:
> > On Tue, 15 Nov 2016 20:59:54 +0530
> > Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote:
> >
> ...
>
> >> @@ -854,7 +857,28 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> >> */
> >> if (dma->task->mm != current->mm)
> >> break;
> >> +
> >> unmapped += dma->size;
> >> +
> >> + if (iommu->external_domain && !RB_EMPTY_ROOT(&dma->pfn_list)) {
> >> + struct vfio_iommu_type1_dma_unmap nb_unmap;
> >> +
> >> + nb_unmap.iova = dma->iova;
> >> + nb_unmap.size = dma->size;
> >> +
> >> + /*
> >> + * Notifier callback would call vfio_unpin_pages() which
> >> + * would acquire iommu->lock. Release lock here and
> >> + * reacquire it again.
> >> + */
> >> + mutex_unlock(&iommu->lock);
> >> + blocking_notifier_call_chain(&iommu->notifier,
> >> + VFIO_IOMMU_NOTIFY_DMA_UNMAP,
> >> + &nb_unmap);
> >> + mutex_lock(&iommu->lock);
> >> + if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
> >> + break;
> >> + }
> >
> >
> > Why exactly do we need to notify per vfio_dma rather than per unmap
> > request? If we do the latter we can send the notify first, limiting us
> > to races where a page is pinned between the notify and the locking,
> > whereas here, even our dma pointer is suspect once we re-acquire the
> > lock, we don't technically know if another unmap could have removed
> > that already. Perhaps something like this (untested):
> >
>
> There are checks to validate unmap request, like v2 check and who is
> calling unmap and is it allowed for that task to unmap. Before these
> checks its not sure that unmap region range which asked for would be
> unmapped all. Notify call should be at the place where its sure that the
> range provided to notify call is definitely going to be removed. My
> change do that.

Ok, but that does solve the problem. What about this (untested):

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index ee9a680..50cafdf 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -782,9 +782,9 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_unmap *unmap)
{
uint64_t mask;
- struct vfio_dma *dma;
+ struct vfio_dma *dma, *dma_last = NULL;
size_t unmapped = 0;
- int ret = 0;
+ int ret = 0, retries;

mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;

@@ -794,7 +794,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
return -EINVAL;

WARN_ON(mask & PAGE_MASK);
-
+again:
mutex_lock(&iommu->lock);

/*
@@ -851,11 +851,16 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
if (dma->task->mm != current->mm)
break;

- unmapped += dma->size;
-
- if (iommu->external_domain && !RB_EMPTY_ROOT(&dma->pfn_list)) {
+ if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
struct vfio_iommu_type1_dma_unmap nb_unmap;

+ if (dma_last == dma) {
+ BUG_ON(++retries > 10);
+ } else {
+ dma_last = dma;
+ retries = 0;
+ }
+
nb_unmap.iova = dma->iova;
nb_unmap.size = dma->size;

@@ -868,11 +873,11 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
blocking_notifier_call_chain(&iommu->notifier,
VFIO_IOMMU_NOTIFY_DMA_UNMAP,
&nb_unmap);
- mutex_lock(&iommu->lock);
- if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
- break;
+ goto again:
}
+ unmapped += dma->size;
vfio_remove_dma(iommu, dma);
+
}

unlock: