Re: [PATCH 3/5] vfio/type1: Use vfio_batch for vaddr_get_pfns()
From: Mitchell Augustin
Date: Thu Feb 06 2025 - 20:39:17 EST
Reviewed-by: "Mitchell Augustin" <mitchell.augustin@xxxxxxxxxxxxx>
Tested-by: "Mitchell Augustin" <mitchell.augustin@xxxxxxxxxxxxx>
On Wed, Feb 5, 2025 at 5:18 PM Alex Williamson
<alex.williamson@xxxxxxxxxx> wrote:
>
> Passing the vfio_batch to vaddr_get_pfns() allows for greater
> distinction between page backed pfns and pfnmaps. In the case of page
> backed pfns, vfio_batch.size is set to a positive value matching the
> number of pages filled in vfio_batch.pages. For a pfnmap,
> vfio_batch.size remains zero as vfio_batch.pages are not used. In both
> cases the return value continues to indicate the number of pfns and the
> provided pfn arg is set to the initial pfn value.
>
> This allows us to shortcut the pfnmap case, which is detected by the
> zero vfio_batch.size. pfnmaps do not contribute to locked memory
> accounting, therefore we can update counters and continue directly,
> which also enables a future where vaddr_get_pfns() can return a value
> greater than one for consecutive pfnmaps.
>
> NB. Now that we're not guessing whether the initial pfn is page backed
> or pfnmap, we no longer need to special case the put_pfn() and batch
> size reset. It's safe for vfio_batch_unpin() to handle this case.
>
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
> drivers/vfio/vfio_iommu_type1.c | 62 ++++++++++++++++++---------------
> 1 file changed, 34 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 2e95f5f4d881..939920454da7 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -555,12 +555,16 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
>
> /*
> * Returns the positive number of pfns successfully obtained or a negative
> - * error code.
> + * error code. The initial pfn is stored in the pfn arg. For page-backed
> + * pfns, the provided batch is also updated to indicate the filled pages and
> + * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and
> + * returned initial pfn are provided; subsequent pfns are contiguous.
> */
> static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
> long npages, int prot, unsigned long *pfn,
> - struct page **pages)
> + struct vfio_batch *batch)
> {
> + long pin_pages = min_t(long, npages, batch->capacity);
> struct vm_area_struct *vma;
> unsigned int flags = 0;
> int ret;
> @@ -569,10 +573,12 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
> flags |= FOLL_WRITE;
>
> mmap_read_lock(mm);
> - ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
> - pages, NULL);
> + ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM,
> + batch->pages, NULL);
> if (ret > 0) {
> - *pfn = page_to_pfn(pages[0]);
> + *pfn = page_to_pfn(batch->pages[0]);
> + batch->size = ret;
> + batch->offset = 0;
> goto done;
> } else if (!ret) {
> ret = -EFAULT;
> @@ -628,32 +634,41 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
> *pfn_base = 0;
> }
>
> + if (unlikely(disable_hugepages))
> + npage = 1;
> +
> while (npage) {
> if (!batch->size) {
> /* Empty batch, so refill it. */
> - long req_pages = min_t(long, npage, batch->capacity);
> -
> - ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
> - &pfn, batch->pages);
> + ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot,
> + &pfn, batch);
> if (ret < 0)
> goto unpin_out;
>
> - batch->size = ret;
> - batch->offset = 0;
> -
> if (!*pfn_base) {
> *pfn_base = pfn;
> rsvd = is_invalid_reserved_pfn(*pfn_base);
> }
> +
> + /* Handle pfnmap */
> + if (!batch->size) {
> + if (pfn != *pfn_base + pinned || !rsvd)
> + goto out;
> +
> + pinned += ret;
> + npage -= ret;
> + vaddr += (PAGE_SIZE * ret);
> + iova += (PAGE_SIZE * ret);
> + continue;
> + }
> }
>
> /*
> - * pfn is preset for the first iteration of this inner loop and
> - * updated at the end to handle a VM_PFNMAP pfn. In that case,
> - * batch->pages isn't valid (there's no struct page), so allow
> - * batch->pages to be touched only when there's more than one
> - * pfn to check, which guarantees the pfns are from a
> - * !VM_PFNMAP vma.
> + * pfn is preset for the first iteration of this inner loop due to the
> + * fact that vaddr_get_pfns() needs to provide the initial pfn for pfnmaps.
> + * Therefore to reduce redundancy, the next pfn is fetched at the end of
> + * the loop. A PageReserved() page could still qualify as page backed and
> + * rsvd here, and therefore continues to use the batch.
> */
> while (true) {
> if (pfn != *pfn_base + pinned ||
> @@ -688,21 +703,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
>
> pfn = page_to_pfn(batch->pages[batch->offset]);
> }
> -
> - if (unlikely(disable_hugepages))
> - break;
> }
>
> out:
> ret = vfio_lock_acct(dma, lock_acct, false);
>
> unpin_out:
> - if (batch->size == 1 && !batch->offset) {
> - /* May be a VM_PFNMAP pfn, which the batch can't remember. */
> - put_pfn(pfn, dma->prot);
> - batch->size = 0;
> - }
> -
> if (ret < 0) {
> if (pinned && !rsvd) {
> for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
> @@ -750,7 +756,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
>
> vfio_batch_init_single(&batch);
>
> - ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, batch.pages);
> + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch);
> if (ret != 1)
> goto out;
>
> --
> 2.47.1
>
--
Mitchell Augustin
Software Engineer - Ubuntu Partner Engineering