Re: [PATCH kernel v8 07/31] vfio: powerpc/spapr: Moving pinning/unpinning to helpers

From: David Gibson
Date: Wed Apr 15 2015 - 04:33:39 EST


On Fri, Apr 10, 2015 at 04:30:49PM +1000, Alexey Kardashevskiy wrote:
> This is a pretty mechanical patch to make next patches simpler.
>
> New tce_iommu_unuse_page() helper does put_page() now but it might skip
> that after the memory registering patch applied.
>
> As we are here, this removes unnecessary checks for a value returned
> by pfn_to_page() as it cannot possibly return NULL.
>
> This moves tce_iommu_disable() later to let tce_iommu_clear() know if
> the container has been enabled because if it has not been, then
> put_page() must not be called on TCEs from the TCE table. This situation
> is not yet possible but it will after KVM acceleration patchset is
> applied.
>
> Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>
> ---
> Changes:
> v6:
> * tce_get_hva() returns hva via a pointer
> ---
> drivers/vfio/vfio_iommu_spapr_tce.c | 68 +++++++++++++++++++++++++++----------
> 1 file changed, 50 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> index c137bb3..ec5ee83 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -196,7 +196,6 @@ static void tce_iommu_release(void *iommu_data)
> struct iommu_table *tbl = container->tbl;
>
> WARN_ON(tbl && !tbl->it_group);
> - tce_iommu_disable(container);
>
> if (tbl) {
> tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
> @@ -204,63 +203,96 @@ static void tce_iommu_release(void *iommu_data)
> if (tbl->it_group)
> tce_iommu_detach_group(iommu_data, tbl->it_group);
> }
> +
> + tce_iommu_disable(container);
> +
> mutex_destroy(&container->lock);
>
> kfree(container);
> }
>
> +static void tce_iommu_unuse_page(struct tce_container *container,
> + unsigned long oldtce)
> +{
> + struct page *page;
> +
> + if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE)))
> + return;
> +
> + /*
> + * VFIO cannot map/unmap when a container is not enabled so
> + * we would not need this check but KVM could map/unmap and if
> + * this happened, we must not put pages as KVM does not get them as
> + * it expects memory pre-registation to do this part.
> + */
> + if (!container->enabled)
> + return;

This worries me a bit. How can whether the contained is enabled now
safely tell you whether get_page() at some earlier point in time?

> +
> + page = pfn_to_page(__pa(oldtce) >> PAGE_SHIFT);
> +
> + if (oldtce & TCE_PCI_WRITE)
> + SetPageDirty(page);
> +
> + put_page(page);
> +}
> +
> static int tce_iommu_clear(struct tce_container *container,
> struct iommu_table *tbl,
> unsigned long entry, unsigned long pages)
> {
> unsigned long oldtce;
> - struct page *page;
>
> for ( ; pages; --pages, ++entry) {
> oldtce = iommu_clear_tce(tbl, entry);
> if (!oldtce)
> continue;
>
> - page = pfn_to_page(oldtce >> PAGE_SHIFT);
> - WARN_ON(!page);
> - if (page) {
> - if (oldtce & TCE_PCI_WRITE)
> - SetPageDirty(page);
> - put_page(page);
> - }
> + tce_iommu_unuse_page(container, (unsigned long) __va(oldtce));
> }
>
> return 0;
> }
>
> +static int tce_get_hva(unsigned long tce, unsigned long *hva)
> +{
> + struct page *page = NULL;
> + enum dma_data_direction direction = iommu_tce_direction(tce);
> +
> + if (get_user_pages_fast(tce & PAGE_MASK, 1,
> + direction != DMA_TO_DEVICE, &page) != 1)
> + return -EFAULT;
> +
> + *hva = (unsigned long) page_address(page);
> +
> + return 0;
> +}

I'd prefer to see this called tce_iommu_use_page() for symmetry.

> +
> static long tce_iommu_build(struct tce_container *container,
> struct iommu_table *tbl,
> unsigned long entry, unsigned long tce, unsigned long pages)
> {
> long i, ret = 0;
> - struct page *page = NULL;
> + struct page *page;
> unsigned long hva;
> enum dma_data_direction direction = iommu_tce_direction(tce);
>
> for (i = 0; i < pages; ++i) {
> - ret = get_user_pages_fast(tce & PAGE_MASK, 1,
> - direction != DMA_TO_DEVICE, &page);
> - if (unlikely(ret != 1)) {
> - ret = -EFAULT;
> + ret = tce_get_hva(tce, &hva);
> + if (ret)
> break;
> - }
>
> + page = pfn_to_page(__pa(hva) >> PAGE_SHIFT);
> if (!tce_page_is_contained(page, tbl->it_page_shift)) {
> ret = -EPERM;
> break;
> }
>
> - hva = (unsigned long) page_address(page) +
> - (tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK);
> + /* Preserve offset within IOMMU page */
> + hva |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
>
> ret = iommu_tce_build(tbl, entry + i, hva, direction);
> if (ret) {
> - put_page(page);
> + tce_iommu_unuse_page(container, hva);
> pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
> __func__, entry << tbl->it_page_shift,
> tce, ret);

--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: pgpf1CUybTgtP.pgp
Description: PGP signature