Re: [RFC v2 PATCH 06/10] vfio/pci: Remove vfio_pci_zap_bars()

From: Christian König

Date: Fri Mar 13 2026 - 05:15:33 EST


On 3/12/26 19:46, Matt Evans wrote:
> vfio_pci_zap_bars() and the wrapper
> vfio_pci_zap_and_down_write_memory_lock() are redundant as of
> "vfio/pci: Convert BAR mmap() to use a DMABUF". The DMABUFs used for
> BAR mappings already zap PTEs via the existing
> vfio_pci_dma_buf_move(), which notifies changes to the BAR space
> (e.g. around reset).
>
> Remove the old functions, and the various points needing to zap BARs
> become slightly cleaner.

No a full review, but it looks like you now take the DMA buf reservation lock while holding vdev->memory_lock.

I strongly recommend enabling lockdep while testing that, just to be on the sure side that all locks are taken in a consistend order.

Regards,
Christian.

>
> Signed-off-by: Matt Evans <mattev@xxxxxxxx>
> ---
> drivers/vfio/pci/vfio_pci_config.c | 18 ++++++------------
> drivers/vfio/pci/vfio_pci_core.c | 30 +++++++-----------------------
> drivers/vfio/pci/vfio_pci_priv.h | 1 -
> 3 files changed, 13 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index b4e39253f98d..c7ed28be1104 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -590,12 +590,9 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
> virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
> new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
>
> - if (!new_mem) {
> - vfio_pci_zap_and_down_write_memory_lock(vdev);
> + down_write(&vdev->memory_lock);
> + if (!new_mem)
> vfio_pci_dma_buf_move(vdev, true);
> - } else {
> - down_write(&vdev->memory_lock);
> - }
>
> /*
> * If the user is writing mem/io enable (new_mem/io) and we
> @@ -712,12 +709,9 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
> static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
> pci_power_t state)
> {
> - if (state >= PCI_D3hot) {
> - vfio_pci_zap_and_down_write_memory_lock(vdev);
> + down_write(&vdev->memory_lock);
> + if (state >= PCI_D3hot)
> vfio_pci_dma_buf_move(vdev, true);
> - } else {
> - down_write(&vdev->memory_lock);
> - }
>
> vfio_pci_set_power_state(vdev, state);
> if (__vfio_pci_memory_enabled(vdev))
> @@ -908,7 +902,7 @@ static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
> &cap);
>
> if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
> - vfio_pci_zap_and_down_write_memory_lock(vdev);
> + down_write(&vdev->memory_lock);
> vfio_pci_dma_buf_move(vdev, true);
> pci_try_reset_function(vdev->pdev);
> if (__vfio_pci_memory_enabled(vdev))
> @@ -993,7 +987,7 @@ static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
> &cap);
>
> if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
> - vfio_pci_zap_and_down_write_memory_lock(vdev);
> + down_write(&vdev->memory_lock);
> vfio_pci_dma_buf_move(vdev, true);
> pci_try_reset_function(vdev->pdev);
> if (__vfio_pci_memory_enabled(vdev))
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 41224efa58d8..9e9ad97c2f7f 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -319,7 +319,7 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
> * The vdev power related flags are protected with 'memory_lock'
> * semaphore.
> */
> - vfio_pci_zap_and_down_write_memory_lock(vdev);
> + down_write(&vdev->memory_lock);
> vfio_pci_dma_buf_move(vdev, true);
>
> if (vdev->pm_runtime_engaged) {
> @@ -1229,7 +1229,7 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
> if (!vdev->reset_works)
> return -EINVAL;
>
> - vfio_pci_zap_and_down_write_memory_lock(vdev);
> + down_write(&vdev->memory_lock);
>
> /*
> * This function can be invoked while the power state is non-D0. If
> @@ -1613,22 +1613,6 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
> }
> EXPORT_SYMBOL_GPL(vfio_pci_core_write);
>
> -static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
> -{
> - struct vfio_device *core_vdev = &vdev->vdev;
> - loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
> - loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
> - loff_t len = end - start;
> -
> - unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
> -}
> -
> -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
> -{
> - down_write(&vdev->memory_lock);
> - vfio_pci_zap_bars(vdev);
> -}
> -
> u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
> {
> u16 cmd;
> @@ -2487,10 +2471,11 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> }
>
> /*
> - * Take the memory write lock for each device and zap BAR
> - * mappings to prevent the user accessing the device while in
> - * reset. Locking multiple devices is prone to deadlock,
> - * runaway and unwind if we hit contention.
> + * Take the memory write lock for each device and
> + * revoke all DMABUFs, which will prevent any access
> + * to the device while in reset. Locking multiple
> + * devices is prone to deadlock, runaway and unwind if
> + * we hit contention.
> */
> if (!down_write_trylock(&vdev->memory_lock)) {
> ret = -EBUSY;
> @@ -2498,7 +2483,6 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> }
>
> vfio_pci_dma_buf_move(vdev, true);
> - vfio_pci_zap_bars(vdev);
> }
>
> if (!list_entry_is_head(vdev,
> diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
> index 37ece9b4b5bd..e201c96bbb14 100644
> --- a/drivers/vfio/pci/vfio_pci_priv.h
> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> @@ -78,7 +78,6 @@ void vfio_config_free(struct vfio_pci_core_device *vdev);
> int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
> pci_power_t state);
>
> -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
> u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
> void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
> u16 cmd);