Re: [PATCH v12 05/22] vfio iommu: Added pin and unpin callback functions to vfio_iommu_driver_ops

From: Kirti Wankhede
Date: Tue Nov 15 2016 - 10:04:33 EST




On 11/15/2016 1:13 AM, Alex Williamson wrote:
> On Mon, 14 Nov 2016 21:12:19 +0530
> Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote:
>
>> Added APIs for pining and unpining set of pages. These call back into
>> backend iommu module to actually pin and unpin pages.
>> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
>> IOMMU module that supports pining and unpinning pages for mdev devices
>> should provide these functions.
>>
>> Renamed static functions in vfio_type1_iommu.c to resolve conflicts
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
>> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx>
>> Change-Id: Ia7417723aaae86bec2959ad9ae6c2915ddd340e0
>> ---
>> drivers/vfio/vfio.c | 103 ++++++++++++++++++++++++++++++++++++++++
>> drivers/vfio/vfio_iommu_type1.c | 20 ++++----
>> include/linux/vfio.h | 12 ++++-
>> 3 files changed, 124 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>> index 2e83bdf007fe..7dcfbca2016a 100644
>> --- a/drivers/vfio/vfio.c
>> +++ b/drivers/vfio/vfio.c
>> @@ -1799,6 +1799,109 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>> }
>> EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>>
>> +
>> +/*
>> + * Pin a set of guest PFNs and return their associated host PFNs for local
>> + * domain only.
>> + * @dev [in] : device
>> + * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
>> + * PFNs should not be greater than PAGE_SIZE.
>> + * @npage [in] :count of elements in array. This count should not be greater
>> + * than PAGE_SIZE.
>> + * @prot [in] : protection flags
>> + * @phys_pfn[out] : array of host PFNs
>> + * Return error or number of pages pinned.
>> + */
>> +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
>> + int prot, unsigned long *phys_pfn)
>> +{
>> + struct vfio_container *container;
>> + struct vfio_group *group;
>> + struct vfio_iommu_driver *driver;
>> + int ret;
>> +
>> + if (!dev || !user_pfn || !phys_pfn || !npage)
>> + return -EINVAL;
>> +
>> + if (npage >= PAGE_SIZE)
>> + return -E2BIG;
>
> This misses the point of using PAGE_SIZE. The concern is that
> previously we were allowing (nearly) arbitrarily large arrays to be
> passed around. The agreement as I understood it would be that the
> array itself would be sized up to a maximum of PAGE_SIZE, which means
> the number of entries cannot exceed PAGE_SIZE/sizeof(*user_pfn) (ie.
> 512 of x86). I also suggested that we should have a #define for this so
> that vendor drivers can actually chunk their calls into allowable sizes
> if they need to and not need to guess the limit, ex.
>
> include/linux/vfio.h
> #define VFIO_PAGE_PINNING_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned
> long))
>
> If we wanted a simple limit to the number of entries per call, there
> would be no reason to have it based on PAGE_SIZE. Thanks,
>

Sorry I mis-interpreted it. Updating it on v13.
Thanks,
Kirti

> Alex
>
>> +
>> + group = vfio_group_get_from_dev(dev);
>> + if (IS_ERR(group))
>> + return PTR_ERR(group);
>> +
>> + ret = vfio_group_add_container_user(group);
>> + if (ret)
>> + goto err_pin_pages;
>> +
>> + container = group->container;
>> + down_read(&container->group_lock);
>> +
>> + driver = container->iommu_driver;
>> + if (likely(driver && driver->ops->pin_pages))
>> + ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
>> + npage, prot, phys_pfn);
>> + else
>> + ret = -ENOTTY;
>> +
>> + up_read(&container->group_lock);
>> + vfio_group_try_dissolve_container(group);
>> +
>> +err_pin_pages:
>> + vfio_group_put(group);
>> + return ret;
>> +}
>> +EXPORT_SYMBOL(vfio_pin_pages);
>> +
>> +/*
>> + * Unpin set of host PFNs for local domain only.
>> + * @dev [in] : device
>> + * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
>> + * PFNs should not be greater than PAGE_SIZE.
>> + * @npage [in] :count of elements in array. This count should not be greater
>> + * than PAGE_SIZE.
>> + * Return error or number of pages unpinned.
>> + */
>> +int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
>> +{
>> + struct vfio_container *container;
>> + struct vfio_group *group;
>> + struct vfio_iommu_driver *driver;
>> + int ret;
>> +
>> + if (!dev || !user_pfn || !npage)
>> + return -EINVAL;
>> +
>> + if (npage >= PAGE_SIZE)
>> + return -E2BIG;
>> +
>> + group = vfio_group_get_from_dev(dev);
>> + if (IS_ERR(group))
>> + return PTR_ERR(group);
>> +
>> + ret = vfio_group_add_container_user(group);
>> + if (ret)
>> + goto err_unpin_pages;
>> +
>> + container = group->container;
>> + down_read(&container->group_lock);
>> +
>> + driver = container->iommu_driver;
>> + if (likely(driver && driver->ops->unpin_pages))
>> + ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
>> + npage);
>> + else
>> + ret = -ENOTTY;
>> +
>> + up_read(&container->group_lock);
>> + vfio_group_try_dissolve_container(group);
>> +
>> +err_unpin_pages:
>> + vfio_group_put(group);
>> + return ret;
>> +}
>> +EXPORT_SYMBOL(vfio_unpin_pages);
>> +
>> /**
>> * Module/class support
>> */
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 2ba19424e4a1..9f3d58d3dfaf 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -259,8 +259,8 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
>> * the iommu can only map chunks of consecutive pfns anyway, so get the
>> * first page and all consecutive pages with the same locking.
>> */
>> -static long vfio_pin_pages(unsigned long vaddr, long npage,
>> - int prot, unsigned long *pfn_base)
>> +static long vfio_pin_pages_remote(unsigned long vaddr, long npage,
>> + int prot, unsigned long *pfn_base)
>> {
>> unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> bool lock_cap = capable(CAP_IPC_LOCK);
>> @@ -318,8 +318,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
>> return i;
>> }
>>
>> -static long vfio_unpin_pages(unsigned long pfn, long npage,
>> - int prot, bool do_accounting)
>> +static long vfio_unpin_pages_remote(unsigned long pfn, long npage,
>> + int prot, bool do_accounting)
>> {
>> unsigned long unlocked = 0;
>> long i;
>> @@ -382,9 +382,9 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
>> if (WARN_ON(!unmapped))
>> break;
>>
>> - unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
>> - unmapped >> PAGE_SHIFT,
>> - dma->prot, false);
>> + unlocked += vfio_unpin_pages_remote(phys >> PAGE_SHIFT,
>> + unmapped >> PAGE_SHIFT,
>> + dma->prot, false);
>> iova += unmapped;
>>
>> cond_resched();
>> @@ -613,8 +613,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>>
>> while (size) {
>> /* Pin a contiguous chunk of memory */
>> - npage = vfio_pin_pages(vaddr + dma->size,
>> - size >> PAGE_SHIFT, prot, &pfn);
>> + npage = vfio_pin_pages_remote(vaddr + dma->size,
>> + size >> PAGE_SHIFT, prot, &pfn);
>> if (npage <= 0) {
>> WARN_ON(!npage);
>> ret = (int)npage;
>> @@ -624,7 +624,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>> /* Map it! */
>> ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
>> if (ret) {
>> - vfio_unpin_pages(pfn, npage, prot, true);
>> + vfio_unpin_pages_remote(pfn, npage, prot, true);
>> break;
>> }
>>
>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>> index 0ecae0b1cd34..86f507d0f585 100644
>> --- a/include/linux/vfio.h
>> +++ b/include/linux/vfio.h
>> @@ -75,7 +75,11 @@ struct vfio_iommu_driver_ops {
>> struct iommu_group *group);
>> void (*detach_group)(void *iommu_data,
>> struct iommu_group *group);
>> -
>> + int (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
>> + int npage, int prot,
>> + unsigned long *phys_pfn);
>> + int (*unpin_pages)(void *iommu_data,
>> + unsigned long *user_pfn, int npage);
>> };
>>
>> extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
>> @@ -127,6 +131,12 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>> }
>> #endif /* CONFIG_EEH */
>>
>> +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
>> + int npage, int prot, unsigned long *phys_pfn);
>> +
>> +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
>> + int npage);
>> +
>> /*
>> * IRQfd - generic
>> */
>