Re: [PATCH v12 05/22] vfio iommu: Added pin and unpin callback functions to vfio_iommu_driver_ops

From: Alex Williamson
Date: Mon Nov 14 2016 - 14:44:07 EST


On Mon, 14 Nov 2016 21:12:19 +0530
Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote:

> Added APIs for pining and unpining set of pages. These call back into
> backend iommu module to actually pin and unpin pages.
> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
> IOMMU module that supports pining and unpinning pages for mdev devices
> should provide these functions.
>
> Renamed static functions in vfio_type1_iommu.c to resolve conflicts
>
> Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx>
> Change-Id: Ia7417723aaae86bec2959ad9ae6c2915ddd340e0
> ---
> drivers/vfio/vfio.c | 103 ++++++++++++++++++++++++++++++++++++++++
> drivers/vfio/vfio_iommu_type1.c | 20 ++++----
> include/linux/vfio.h | 12 ++++-
> 3 files changed, 124 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 2e83bdf007fe..7dcfbca2016a 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1799,6 +1799,109 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
> }
> EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>
> +
> +/*
> + * Pin a set of guest PFNs and return their associated host PFNs for local
> + * domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
> + * PFNs should not be greater than PAGE_SIZE.
> + * @npage [in] :count of elements in array. This count should not be greater
> + * than PAGE_SIZE.
> + * @prot [in] : protection flags
> + * @phys_pfn[out] : array of host PFNs
> + * Return error or number of pages pinned.
> + */
> +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
> + int prot, unsigned long *phys_pfn)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + int ret;
> +
> + if (!dev || !user_pfn || !phys_pfn || !npage)
> + return -EINVAL;
> +
> + if (npage >= PAGE_SIZE)
> + return -E2BIG;

This misses the point of using PAGE_SIZE. The concern is that
previously we were allowing (nearly) arbitrarily large arrays to be
passed around. The agreement as I understood it would be that the
array itself would be sized up to a maximum of PAGE_SIZE, which means
the number of entries cannot exceed PAGE_SIZE/sizeof(*user_pfn) (ie.
512 of x86). I also suggested that we should have a #define for this so
that vendor drivers can actually chunk their calls into allowable sizes
if they need to and not need to guess the limit, ex.

include/linux/vfio.h
#define VFIO_PAGE_PINNING_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned
long))

If we wanted a simple limit to the number of entries per call, there
would be no reason to have it based on PAGE_SIZE. Thanks,

Alex

> +
> + group = vfio_group_get_from_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> +
> + ret = vfio_group_add_container_user(group);
> + if (ret)
> + goto err_pin_pages;
> +
> + container = group->container;
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (likely(driver && driver->ops->pin_pages))
> + ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
> + npage, prot, phys_pfn);
> + else
> + ret = -ENOTTY;
> +
> + up_read(&container->group_lock);
> + vfio_group_try_dissolve_container(group);
> +
> +err_pin_pages:
> + vfio_group_put(group);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfio_pin_pages);
> +
> +/*
> + * Unpin set of host PFNs for local domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
> + * PFNs should not be greater than PAGE_SIZE.
> + * @npage [in] :count of elements in array. This count should not be greater
> + * than PAGE_SIZE.
> + * Return error or number of pages unpinned.
> + */
> +int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + int ret;
> +
> + if (!dev || !user_pfn || !npage)
> + return -EINVAL;
> +
> + if (npage >= PAGE_SIZE)
> + return -E2BIG;
> +
> + group = vfio_group_get_from_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> +
> + ret = vfio_group_add_container_user(group);
> + if (ret)
> + goto err_unpin_pages;
> +
> + container = group->container;
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (likely(driver && driver->ops->unpin_pages))
> + ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
> + npage);
> + else
> + ret = -ENOTTY;
> +
> + up_read(&container->group_lock);
> + vfio_group_try_dissolve_container(group);
> +
> +err_unpin_pages:
> + vfio_group_put(group);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfio_unpin_pages);
> +
> /**
> * Module/class support
> */
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 2ba19424e4a1..9f3d58d3dfaf 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -259,8 +259,8 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
> * the iommu can only map chunks of consecutive pfns anyway, so get the
> * first page and all consecutive pages with the same locking.
> */
> -static long vfio_pin_pages(unsigned long vaddr, long npage,
> - int prot, unsigned long *pfn_base)
> +static long vfio_pin_pages_remote(unsigned long vaddr, long npage,
> + int prot, unsigned long *pfn_base)
> {
> unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> bool lock_cap = capable(CAP_IPC_LOCK);
> @@ -318,8 +318,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
> return i;
> }
>
> -static long vfio_unpin_pages(unsigned long pfn, long npage,
> - int prot, bool do_accounting)
> +static long vfio_unpin_pages_remote(unsigned long pfn, long npage,
> + int prot, bool do_accounting)
> {
> unsigned long unlocked = 0;
> long i;
> @@ -382,9 +382,9 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
> if (WARN_ON(!unmapped))
> break;
>
> - unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
> - unmapped >> PAGE_SHIFT,
> - dma->prot, false);
> + unlocked += vfio_unpin_pages_remote(phys >> PAGE_SHIFT,
> + unmapped >> PAGE_SHIFT,
> + dma->prot, false);
> iova += unmapped;
>
> cond_resched();
> @@ -613,8 +613,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>
> while (size) {
> /* Pin a contiguous chunk of memory */
> - npage = vfio_pin_pages(vaddr + dma->size,
> - size >> PAGE_SHIFT, prot, &pfn);
> + npage = vfio_pin_pages_remote(vaddr + dma->size,
> + size >> PAGE_SHIFT, prot, &pfn);
> if (npage <= 0) {
> WARN_ON(!npage);
> ret = (int)npage;
> @@ -624,7 +624,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
> /* Map it! */
> ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
> if (ret) {
> - vfio_unpin_pages(pfn, npage, prot, true);
> + vfio_unpin_pages_remote(pfn, npage, prot, true);
> break;
> }
>
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 0ecae0b1cd34..86f507d0f585 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -75,7 +75,11 @@ struct vfio_iommu_driver_ops {
> struct iommu_group *group);
> void (*detach_group)(void *iommu_data,
> struct iommu_group *group);
> -
> + int (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
> + int npage, int prot,
> + unsigned long *phys_pfn);
> + int (*unpin_pages)(void *iommu_data,
> + unsigned long *user_pfn, int npage);
> };
>
> extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
> @@ -127,6 +131,12 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> }
> #endif /* CONFIG_EEH */
>
> +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> + int npage, int prot, unsigned long *phys_pfn);
> +
> +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
> + int npage);
> +
> /*
> * IRQfd - generic
> */