Re: [PATCH v4 3/3] vfio/pci: Replace vfio_pci_core_setup_barmap() with vfio_pci_core_get_iomap()

From: Alex Williamson

Date: Thu May 07 2026 - 18:21:51 EST


On Tue, 5 May 2026 10:38:31 -0700
Matt Evans <mattev@xxxxxxxx> wrote:

> Since "vfio/pci: Set up barmap in vfio_pci_core_enable()", the
> resource request and iomap for the BARs was performed early, and
> vfio_pci_core_setup_barmap() just checks those actions succeeded.
>
> Move this logic to a new helper that checks success and returns the
> iomap address, replacing the various bare vdev->barmap[] lookups.
> This maintains the error behaviour of the previous on-demand
> vfio_pci_core_setup_barmap() scheme.
>
> Signed-off-by: Matt Evans <mattev@xxxxxxxx>
> ---
> drivers/vfio/pci/nvgrace-gpu/main.c | 11 ++++-------
> drivers/vfio/pci/vfio_pci_core.c | 11 +++++------
> drivers/vfio/pci/vfio_pci_dmabuf.c | 2 +-
> drivers/vfio/pci/vfio_pci_rdwr.c | 30 ++++++++---------------------
> drivers/vfio/pci/virtio/legacy_io.c | 13 ++++++-------
> include/linux/vfio_pci_core.h | 20 ++++++++++++++++++-
> 6 files changed, 43 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
> index fa056b69f899..e153002258ce 100644
> --- a/drivers/vfio/pci/nvgrace-gpu/main.c
> +++ b/drivers/vfio/pci/nvgrace-gpu/main.c
> @@ -184,13 +184,10 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
>
> /*
> * GPU readiness is checked by reading the BAR0 registers.
> - *
> - * ioremap BAR0 to ensure that the BAR0 mapping is present before
> - * register reads on first fault before establishing any GPU
> - * memory mapping.
> + * The BAR map was just set up by vfio_pci_core_enable(), so
> + * check that was successful and bail early if not:
> */
> - ret = vfio_pci_core_setup_barmap(vdev, 0);
> - if (ret)
> + if (IS_ERR(vfio_pci_core_get_iomap(vdev, 0)))
> goto error_exit;

Sashiko notes we're not setting ret here. The bots are also paranoid
about the unreachable condition that the get_iomap below could return an
ERR_PTR. Maybe head off both by adding an __iomem pointer to the
nvgrace_gpu_pci_core_device struct and a temporary one here. Store the
iomap in the temporary variable, use it to test for IS_ERR() and
PTR_ERR(), then set the pointer in the structure after the last error
condition here. Add one line in the close_device to set it NULL. Then
just use nvdev->bar0_io below.

>
> if (nvdev->resmem.memlength) {
> @@ -275,7 +272,7 @@ nvgrace_gpu_check_device_ready(struct nvgrace_gpu_pci_core_device *nvdev)
> if (!__vfio_pci_memory_enabled(vdev))
> return -EIO;
>
> - ret = nvgrace_gpu_wait_device_ready(vdev->barmap[0]);
> + ret = nvgrace_gpu_wait_device_ready(vfio_pci_core_get_iomap(vdev, 0));
> if (ret)
> return ret;
>
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 62931dc381d8..5c8bd13f10d0 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1761,7 +1761,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
> struct pci_dev *pdev = vdev->pdev;
> unsigned int index;
> u64 phys_len, req_len, pgoff, req_start;
> - int ret;
> + void __iomem *bar_io;
>
> index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
>
> @@ -1795,12 +1795,11 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
> return -EINVAL;
>
> /*
> - * Even though we don't make use of the barmap for the mmap,
> - * we need to request the region and the barmap tracks that.
> + * Ensure the BAR resource region is reserved for use.
> */
> - ret = vfio_pci_core_setup_barmap(vdev, index);
> - if (ret)
> - return ret;
> + bar_io = vfio_pci_core_get_iomap(vdev, index);
> + if (IS_ERR(bar_io))
> + return PTR_ERR(bar_io);
>
> vma->vm_private_data = vdev;
> vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index 69a5c2d511e6..46cd44b22c9c 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -248,7 +248,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> * else. Check that PCI resources have been claimed for it.
> */
> if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
> - vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
> + IS_ERR(vfio_pci_core_get_iomap(vdev, get_dma_buf.region_index)))
> return -ENODEV;
>
> dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
> diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
> index 3bfbb879a005..7f14dd46de17 100644
> --- a/drivers/vfio/pci/vfio_pci_rdwr.c
> +++ b/drivers/vfio/pci/vfio_pci_rdwr.c
> @@ -198,19 +198,6 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
> }
> EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
>
> -/*
> - * The barmap is set up in vfio_pci_core_enable(). Callers use this
> - * function to check that the BAR resources are requested or that the
> - * pci_iomap() was done.
> - */
> -int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
> -{
> - if (IS_ERR(vdev->barmap[bar]))
> - return PTR_ERR(vdev->barmap[bar]);
> - return 0;
> -}
> -EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
> -
> ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> size_t count, loff_t *ppos, bool iswrite)
> {
> @@ -262,13 +249,11 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> */
> max_width = VFIO_PCI_IO_WIDTH_4;
> } else {
> - int ret = vfio_pci_core_setup_barmap(vdev, bar);
> - if (ret) {
> - done = ret;
> + io = vfio_pci_core_get_iomap(vdev, bar);
> + if (IS_ERR(io)) {
> + done = PTR_ERR(io);
> goto out;
> }
> -
> - io = vdev->barmap[bar];
> }
>
> if (bar == vdev->msix_bar) {
> @@ -423,6 +408,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
> loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
> int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
> struct vfio_pci_ioeventfd *ioeventfd;
> + void __iomem *io;
>
> /* Only support ioeventfds into BARs */
> if (bar > VFIO_PCI_BAR5_REGION_INDEX)
> @@ -440,9 +426,9 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
> if (count == 8)
> return -EINVAL;
>
> - ret = vfio_pci_core_setup_barmap(vdev, bar);
> - if (ret)
> - return ret;
> + io = vfio_pci_core_get_iomap(vdev, bar);
> + if (IS_ERR(io))
> + return PTR_ERR(io);

Sashiko seems to note a real existing error here that should also be
pulled out to a separate fix. Given the right offset, this could
generate a negative BAR value. The test at the end of the previous
chunk should should be expanded to `if (bar < 0 || bar > ...BAR5...)`.
Do you want to pick that up in this series? I think it's the only case
that lets that slip through. Thanks,

Alex