[PATCH v3 8/9] vfio/pci: Permanently revoke a DMABUF on request

From: Matt Evans

Date: Wed Jun 10 2026 - 12:05:23 EST


Expand the VFIO DMABUF revocation state to three states:
Not revoked, temporarily revoked, and permanently revoked.

The first two are for existing transient revocation, e.g. across a
function reset, and the DMABUF is put into the last in response to a
new VFIO feature VFIO_DEVICE_FEATURE_DMA_BUF.

VFIO_DEVICE_FEATURE_DMA_BUF passes a DMABUF by fd and requests that
the DMABUF is permanently revoked. On success, it's guaranteed that
the buffer can never be imported/attached/mmap()ed in future, that
dynamic imports have been cleanly detached, and that all mappings have
been made inaccessible/PTEs zapped.

This is useful for lifecycle management, to reclaim VFIO PCI BAR
ranges previously delegated to a subordinate client process: The
driver process can ensure that the loaned resources are revoked when
the client is deemed "done", and exported ranges can be safely re-used
elsewhere.

Refactor the revocation code out of vfio_pci_dma_buf_move() to a
function common to move and the new feature request path.

Signed-off-by: Matt Evans <matt@xxxxxxxxxx>
---
drivers/vfio/pci/vfio_pci_core.c | 6 +-
drivers/vfio/pci/vfio_pci_dmabuf.c | 169 ++++++++++++++++++++++-------
drivers/vfio/pci/vfio_pci_priv.h | 19 +++-
include/uapi/linux/vfio.h | 20 ++++
4 files changed, 173 insertions(+), 41 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 508a5eca910a..064906b25467 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1573,6 +1573,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_DMA_BUF:
return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
+ case VFIO_DEVICE_FEATURE_DMA_BUF_REVOKE:
+ return vfio_pci_core_feature_dma_buf_revoke(vdev, flags, arg, argsz);
default:
return -ENOTTY;
}
@@ -1784,7 +1786,7 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,

dma_resv_lock(priv->dmabuf->resv, NULL);

- if (priv->revoked) {
+ if (priv->status != VFIO_PCI_DMABUF_OK) {
pr_debug_ratelimited("%s VA 0x%lx, pgoff 0x%lx: DMABUF revoked/cleaned up\n",
__func__, vmf->address, vma->vm_pgoff);
dma_resv_unlock(priv->dmabuf->resv);
@@ -1809,7 +1811,7 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,

scoped_guard(rwsem_read, &vdev->memory_lock) {
/* Revocation status must be re-read, under memory_lock */
- if (!priv->revoked) {
+ if (priv->status == VFIO_PCI_DMABUF_OK) {
int pres = vfio_pci_dma_buf_find_pfn(priv, vma,
vmf->address,
order, &pfn);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 2fb09a2c0f6b..b47411992ab6 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -19,7 +19,7 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
if (!attachment->peer2peer)
return -EOPNOTSUPP;

- if (priv->revoked)
+ if (priv->status != VFIO_PCI_DMABUF_OK)
return -ENODEV;

if (!dma_buf_attach_revocable(attachment))
@@ -41,7 +41,7 @@ static int vfio_pci_dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *
* still safe because the fault handler ultimately prevents
* access to a revoked buffer if it isn't caught here.
*/
- if (READ_ONCE(priv->revoked))
+ if (READ_ONCE(priv->status) != VFIO_PCI_DMABUF_OK)
return -ENODEV;
if ((vma->vm_flags & VM_SHARED) == 0)
return -EINVAL;
@@ -81,7 +81,7 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,

dma_resv_assert_held(priv->dmabuf->resv);

- if (priv->revoked)
+ if (priv->status != VFIO_PCI_DMABUF_OK)
return ERR_PTR(-ENODEV);

ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
@@ -291,7 +291,8 @@ static int vfio_pci_dmabuf_export(struct vfio_pci_core_device *vdev,
INIT_LIST_HEAD(&priv->dmabufs_elm);
down_write(&vdev->memory_lock);
dma_resv_lock(priv->dmabuf->resv, NULL);
- priv->revoked = !__vfio_pci_memory_enabled(vdev);
+ priv->status = __vfio_pci_memory_enabled(vdev) ? VFIO_PCI_DMABUF_OK :
+ VFIO_PCI_DMABUF_TEMP_REVOKED;
list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
dma_resv_unlock(priv->dmabuf->resv);
up_write(&vdev->memory_lock);
@@ -322,7 +323,7 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
return -EOPNOTSUPP;

priv = attachment->dmabuf->priv;
- if (priv->revoked)
+ if (priv->status != VFIO_PCI_DMABUF_OK)
return -ENODEV;

/* More than one range to iommufd will require proper DMABUF support */
@@ -591,6 +592,64 @@ int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
return ret;
}

+/* Set the DMABUF's revocation status (OK or temporarily/permanently revoked) */
+static void vfio_pci_dma_buf_set_status(struct vfio_pci_dma_buf *priv,
+ enum vfio_pci_dma_buf_status new_status)
+{
+ bool was_revoked;
+
+ lockdep_assert_held_write(&priv->vdev->memory_lock);
+
+ if (priv->status == VFIO_PCI_DMABUF_PERM_REVOKED ||
+ priv->status == new_status) {
+ return;
+ }
+
+ dma_resv_lock(priv->dmabuf->resv, NULL);
+ was_revoked = (priv->status == VFIO_PCI_DMABUF_TEMP_REVOKED);
+
+ if (new_status != VFIO_PCI_DMABUF_OK) {
+ priv->status = new_status; /* Temp or permanently revoked */
+
+ if (was_revoked) {
+ /*
+ * TEMP_REVOKED is being upgraded to
+ * PERM_REVOKED. The buffer is already gone,
+ * don't wait on it again.
+ */
+ dma_resv_unlock(priv->dmabuf->resv);
+ return;
+ }
+ }
+
+ dma_buf_invalidate_mappings(priv->dmabuf);
+ dma_resv_wait_timeout(priv->dmabuf->resv,
+ DMA_RESV_USAGE_BOOKKEEP, false,
+ MAX_SCHEDULE_TIMEOUT);
+ dma_resv_unlock(priv->dmabuf->resv);
+ if (new_status != VFIO_PCI_DMABUF_OK) {
+ kref_put(&priv->kref, vfio_pci_dma_buf_done);
+ wait_for_completion(&priv->comp);
+ unmap_mapping_range(priv->dmabuf->file->f_mapping,
+ 0, priv->size, 1);
+ /*
+ * Re-arm the registered kref reference and the
+ * completion so the post-revoke state matches the
+ * post-creation state. An un-revoke followed by a
+ * new mapping needs the kref to be non-zero before
+ * kref_get(), and vfio_pci_dma_buf_cleanup()
+ * delegates its drain back through this revoke
+ * path on a possibly-already-revoked dma-buf.
+ */
+ kref_init(&priv->kref);
+ reinit_completion(&priv->comp);
+ } else {
+ dma_resv_lock(priv->dmabuf->resv, NULL);
+ priv->status = VFIO_PCI_DMABUF_OK;
+ dma_resv_unlock(priv->dmabuf->resv);
+ }
+}
+
void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
{
struct vfio_pci_dma_buf *priv;
@@ -599,44 +658,15 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
lockdep_assert_held_write(&vdev->memory_lock);
/*
* Holding memory_lock ensures a racing VMA fault observes
- * priv->revoked properly.
+ * priv->status properly.
*/

list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
if (!get_file_active(&priv->dmabuf->file))
continue;
-
- if (priv->revoked != revoked) {
- dma_resv_lock(priv->dmabuf->resv, NULL);
- if (revoked)
- priv->revoked = true;
- dma_buf_invalidate_mappings(priv->dmabuf);
- dma_resv_wait_timeout(priv->dmabuf->resv,
- DMA_RESV_USAGE_BOOKKEEP, false,
- MAX_SCHEDULE_TIMEOUT);
- dma_resv_unlock(priv->dmabuf->resv);
- if (revoked) {
- kref_put(&priv->kref, vfio_pci_dma_buf_done);
- wait_for_completion(&priv->comp);
- unmap_mapping_range(priv->dmabuf->file->f_mapping,
- 0, priv->size, 1);
- /*
- * Re-arm the registered kref reference and the
- * completion so the post-revoke state matches the
- * post-creation state. An un-revoke followed by a
- * new mapping needs the kref to be non-zero before
- * kref_get(), and vfio_pci_dma_buf_cleanup()
- * delegates its drain back through this revoke
- * path on a possibly-already-revoked dma-buf.
- */
- kref_init(&priv->kref);
- reinit_completion(&priv->comp);
- } else {
- dma_resv_lock(priv->dmabuf->resv, NULL);
- priv->revoked = false;
- dma_resv_unlock(priv->dmabuf->resv);
- }
- }
+ vfio_pci_dma_buf_set_status(priv, revoked ?
+ VFIO_PCI_DMABUF_TEMP_REVOKED :
+ VFIO_PCI_DMABUF_OK);
fput(priv->dmabuf->file);
}
}
@@ -668,3 +698,66 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
}
up_write(&vdev->memory_lock);
}
+
+#ifdef CONFIG_VFIO_PCI_DMABUF
+int vfio_pci_core_feature_dma_buf_revoke(
+ struct vfio_pci_core_device *vdev, u32 flags,
+ struct vfio_device_feature_dma_buf_revoke __user *arg,
+ size_t argsz)
+{
+ struct vfio_device_feature_dma_buf_revoke db_revoke;
+ struct vfio_pci_dma_buf *priv;
+ struct dma_buf *dmabuf;
+ int ret;
+
+ if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
+ return -EOPNOTSUPP;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET,
+ sizeof(db_revoke));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&db_revoke, arg, sizeof(db_revoke)))
+ return -EFAULT;
+
+ dmabuf = dma_buf_get(db_revoke.dmabuf_fd);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+
+ priv = dmabuf->priv;
+ /*
+ * Sanity-check the DMABUF is really a vfio_pci_dma_buf _and_
+ * relates to the VFIO device it was provided with.
+ *
+ * If the DMABUF relates to this vdev then priv->vdev is
+ * stable because this open fd prevents cleanup.
+ *
+ * If it relates to a different vdev, reading priv->vdev might
+ * race with a concurrent cleanup on that device. But if so,
+ * it points to a non-matching vdev or NULL and is unusable
+ * either way.
+ */
+ if (dmabuf->ops != &vfio_pci_dmabuf_ops ||
+ READ_ONCE(priv->vdev) != vdev) {
+ ret = -ENODEV;
+ goto out_put_buf;
+ }
+
+ scoped_guard(rwsem_write, &vdev->memory_lock) {
+ if (priv->status == VFIO_PCI_DMABUF_PERM_REVOKED) {
+ ret = -EBADFD;
+ } else {
+ vfio_pci_dma_buf_set_status(priv,
+ VFIO_PCI_DMABUF_PERM_REVOKED);
+ ret = 0;
+ }
+ }
+
+out_put_buf:
+ dma_buf_put(dmabuf);
+
+ return ret;
+}
+#endif /* CONFIG_VFIO_PCI_DMABUF */
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index db2e2aeae88f..3c2f2575b670 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -23,6 +23,12 @@ struct vfio_pci_ioeventfd {
bool test_mem;
};

+enum vfio_pci_dma_buf_status {
+ VFIO_PCI_DMABUF_OK = 0,
+ VFIO_PCI_DMABUF_TEMP_REVOKED = 1,
+ VFIO_PCI_DMABUF_PERM_REVOKED = 2,
+};
+
struct vfio_pci_dma_buf {
struct dma_buf *dmabuf;
struct vfio_pci_core_device *vdev;
@@ -35,7 +41,7 @@ struct vfio_pci_dma_buf {
struct kref kref;
struct completion comp;
unsigned long vma_pgoff_adjust;
- u8 revoked : 1;
+ enum vfio_pci_dma_buf_status status;
};

extern const struct vm_operations_struct vfio_pci_mmap_ops;
@@ -148,6 +154,10 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked);
int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
struct vfio_device_feature_dma_buf __user *arg,
size_t argsz);
+int vfio_pci_core_feature_dma_buf_revoke(
+ struct vfio_pci_core_device *vdev, u32 flags,
+ struct vfio_device_feature_dma_buf_revoke __user *arg,
+ size_t argsz);
#else
static inline int
vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
@@ -156,6 +166,13 @@ vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
{
return -ENOTTY;
}
+static inline int vfio_pci_core_feature_dma_buf_revoke(
+ struct vfio_pci_core_device *vdev, u32 flags,
+ struct vfio_device_feature_dma_buf_revoke __user *arg,
+ size_t argsz)
+{
+ return -ENOTTY;
+}
#endif

#endif
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 5de618a3a5ee..697c0bb4b9bc 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1534,6 +1534,26 @@ struct vfio_device_feature_dma_buf {
*/
#define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12

+/**
+ * Given a dma_buf fd previously created by
+ * VFIO_DEVICE_FEATURE_DMA_BUF, a SET of this feature requests that
+ * access to the corresponding DMABUF is immediately and permanently
+ * revoked. On successful return, the buffer is not accessible
+ * through any mmap() or dma-buf import. The buffer is permanently
+ * disabled, and VFIO refuses all map, mmap, attach, etc. requests.
+ *
+ * Return: 0 on success, -1 and errno is set on failure:
+ *
+ * EBADF, EINVAL: dmabuf_fd is not a DMABUF fd.
+ * ENODEV: The dmabuf_fd does not match this VFIO device.
+ * EBADFD: The DMABUF is already revoked.
+ */
+#define VFIO_DEVICE_FEATURE_DMA_BUF_REVOKE 13
+
+struct vfio_device_feature_dma_buf_revoke {
+ __s32 dmabuf_fd;
+};
+
/* -------- API for Type1 VFIO IOMMU -------- */

/**
--
2.50.1 (Apple Git-155)