Re: [PATCH v7 1/6] vfio: refactor vfio_pci_mmap_huge_fault function

From: Alex Williamson

Date: Wed Nov 26 2025 - 10:24:15 EST


On Wed, 26 Nov 2025 05:26:22 +0000
<ankita@xxxxxxxxxx> wrote:

> From: Ankit Agrawal <ankita@xxxxxxxxxx>
>
> Refactor vfio_pci_mmap_huge_fault to take out the implementation
> to map the VMA to the PTE/PMD/PUD as a separate function.
>
> Export the new function to be used by nvgrace-gpu module.
>
> No functional change is intended.
>
> Cc: Shameer Kolothum <skolothumtho@xxxxxxxxxx>
> Cc: Alex Williamson <alex@xxxxxxxxxxx>
> Cc: Jason Gunthorpe <jgg@xxxxxxxx>
> Reviewed-by: Shameer Kolothum <skolothumtho@xxxxxxxxxx>
> Signed-off-by: Ankit Agrawal <ankita@xxxxxxxxxx>
> ---
> drivers/vfio/pci/vfio_pci_core.c | 54 +++++++++++++++++---------------
> include/linux/vfio_pci_core.h | 16 ++++++++++
> 2 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 7dcf5439dedc..52e3a10d776b 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1640,48 +1640,52 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
> return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
> }
>
> -static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> - unsigned int order)
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> + struct vm_fault *vmf,
> + unsigned long pfn,
> + unsigned int order)
> {
> - struct vm_area_struct *vma = vmf->vma;
> - struct vfio_pci_core_device *vdev = vma->vm_private_data;
> - unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> - unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> - unsigned long pfn = vma_to_pfn(vma) + pgoff;
> - vm_fault_t ret = VM_FAULT_SIGBUS;
> -
> - if (order && (addr < vma->vm_start ||
> - addr + (PAGE_SIZE << order) > vma->vm_end ||
> - pfn & ((1 << order) - 1))) {
> - ret = VM_FAULT_FALLBACK;
> - goto out;
> - }
> -
> - down_read(&vdev->memory_lock);
> + lockdep_assert_held_read(&vdev->memory_lock);
>
> if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
> - goto out_unlock;
> + return VM_FAULT_SIGBUS;
>
> switch (order) {
> case 0:
> - ret = vmf_insert_pfn(vma, vmf->address, pfn);
> - break;
> + return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
> #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
> case PMD_ORDER:
> - ret = vmf_insert_pfn_pmd(vmf, pfn, false);
> - break;
> + return vmf_insert_pfn_pmd(vmf, pfn, false);
> #endif
> #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
> case PUD_ORDER:
> - ret = vmf_insert_pfn_pud(vmf, pfn, false);
> + return vmf_insert_pfn_pud(vmf, pfn, false);
> break;
> #endif
> default:
> + return VM_FAULT_FALLBACK;
> + }
> +}
> +EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
> +
> +static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> + unsigned int order)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct vfio_pci_core_device *vdev = vma->vm_private_data;
> + unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> + unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> + unsigned long pfn = vma_to_pfn(vma) + pgoff;
> + vm_fault_t ret;
> +
> + if (unmappable_for_order(vma, addr, pfn, order)) {
> ret = VM_FAULT_FALLBACK;
> + goto out;
> }
>
> -out_unlock:
> - up_read(&vdev->memory_lock);
> + scoped_guard(rwsem_read, &vdev->memory_lock)
> + ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
> +
> out:

We really don't need a goto to jump over this tiny section of code.
With the naming/polarity change below this can just be:

vm_fault_t ret = VM_FAULT_FALLBACK;

if (is_aligned_for_order(vma, addr, pfn, order)) {
scoped_guard(rwsem_read, &vdev->memory_lock)
ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
}


> dev_dbg_ratelimited(&vdev->pdev->dev,
> "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index f541044e42a2..1d457216ce4d 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
> size_t count, loff_t *ppos);
> ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
> size_t count, loff_t *ppos);
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> + struct vm_fault *vmf, unsigned long pfn,
> + unsigned int order);
> int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
> void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
> int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
> @@ -161,4 +164,17 @@ VFIO_IOREAD_DECLARATION(32)
> VFIO_IOREAD_DECLARATION(64)
> #endif
>
> +static inline bool unmappable_for_order(struct vm_area_struct *vma,
> + unsigned long addr,
> + unsigned long pfn,
> + unsigned int order)
> +{
> + if (order && (addr < vma->vm_start ||
> + addr + (PAGE_SIZE << order) > vma->vm_end ||
> + !IS_ALIGNED(pfn, 1 << order)))
> + return true;
> +
> + return false;
> +}


Change polarity and rename to is_aligned_for_order()? No need for
branched return.

return !(order && (addr < vma->vm_start ||
addr + (PAGE_SIZE << order) > vma->vm_end ||
!IS_ALIGNED(pfn, 1 << order)));

Describe this change in the commit log. Thanks,

Alex