[PATCH 6.6 1/3] vfio: Create vfio_fs_type with inode per device

From: Axel Rasmussen
Date: Wed Jul 17 2024 - 18:25:19 EST


From: Alex Williamson <alex.williamson@xxxxxxxxxx>

commit b7c5e64fecfa88764791679cca4786ac65de739e upstream.

By linking all the device fds we provide to userspace to an
address space through a new pseudo fs, we can use tools like
unmap_mapping_range() to zap all vmas associated with a device.

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Link: https://lore.kernel.org/r/20240530045236.1005864-2-alex.williamson@xxxxxxxxxx
Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
Signed-off-by: Axel Rasmussen <axelrasmussen@xxxxxxxxxx>
---
drivers/vfio/device_cdev.c | 7 ++++++
drivers/vfio/group.c | 7 ++++++
drivers/vfio/vfio_main.c | 44 ++++++++++++++++++++++++++++++++++++++
include/linux/vfio.h | 1 +
4 files changed, 59 insertions(+)

diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
index e75da0a70d1f..bb1817bd4ff3 100644
--- a/drivers/vfio/device_cdev.c
+++ b/drivers/vfio/device_cdev.c
@@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)

filep->private_data = df;

+ /*
+ * Use the pseudo fs inode on the device to link all mmaps
+ * to the same address space, allowing us to unmap all vmas
+ * associated to this device using unmap_mapping_range().
+ */
+ filep->f_mapping = device->inode->i_mapping;
+
return 0;

err_put_registration:
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index 610a429c6191..ded364588d29 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
*/
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);

+ /*
+ * Use the pseudo fs inode on the device to link all mmaps
+ * to the same address space, allowing us to unmap all vmas
+ * associated to this device using unmap_mapping_range().
+ */
+ filep->f_mapping = device->inode->i_mapping;
+
if (device->group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current));
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 40732e8ed4c6..a205d3a4e379 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -22,8 +22,10 @@
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
+#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/pci.h>
+#include <linux/pseudo_fs.h>
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -43,9 +45,13 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>"
#define DRIVER_DESC "VFIO - User Level meta-driver"

+#define VFIO_MAGIC 0x5646494f /* "VFIO" */
+
static struct vfio {
struct class *device_class;
struct ida device_ida;
+ struct vfsmount *vfs_mount;
+ int fs_count;
} vfio;

#ifdef CONFIG_VFIO_NOIOMMU
@@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev)
if (device->ops->release)
device->ops->release(device);

+ iput(device->inode);
+ simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
kvfree(device);
}

@@ -228,6 +236,34 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
}
EXPORT_SYMBOL_GPL(_vfio_alloc_device);

+static int vfio_fs_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type vfio_fs_type = {
+ .name = "vfio",
+ .owner = THIS_MODULE,
+ .init_fs_context = vfio_fs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static struct inode *vfio_fs_inode_new(void)
+{
+ struct inode *inode;
+ int ret;
+
+ ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count);
+ if (ret)
+ return ERR_PTR(ret);
+
+ inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb);
+ if (IS_ERR(inode))
+ simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
+
+ return inode;
+}
+
/*
* Initialize a vfio_device so it can be registered to vfio core.
*/
@@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
init_completion(&device->comp);
device->dev = dev;
device->ops = ops;
+ device->inode = vfio_fs_inode_new();
+ if (IS_ERR(device->inode)) {
+ ret = PTR_ERR(device->inode);
+ goto out_inode;
+ }

if (ops->init) {
ret = ops->init(device);
@@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
return 0;

out_uninit:
+ iput(device->inode);
+ simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
+out_inode:
vfio_release_device_set(device);
ida_free(&vfio.device_ida, device->index);
return ret;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 5ac5f182ce0b..514a7f9b3ef4 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -64,6 +64,7 @@ struct vfio_device {
struct completion comp;
struct iommufd_access *iommufd_access;
void (*put_kvm)(struct kvm *kvm);
+ struct inode *inode;
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device;
u8 iommufd_attached:1;
--
2.45.2.993.g49e7a77208-goog