[RFC PATCH 01/10] vfio: Create vfio_fs_type with inode per device
From: Alex Williamson
Date: Mon Feb 22 2021 - 11:52:56 EST
By linking all the device fds we provide to userspace to an
address space through a new pseudo fs, we can use tools like
unmap_mapping_range() to zap all vmas associated with a device.
Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
---
drivers/vfio/vfio.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 38779e6fd80c..464caef97aff 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -32,11 +32,18 @@
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
+#include <linux/pseudo_fs.h>
+#include <linux/mount.h>
#define DRIVER_VERSION "0.3"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>"
#define DRIVER_DESC "VFIO - User Level meta-driver"
+#define VFIO_MAGIC 0x5646494f /* "VFIO" */
+
+static int vfio_fs_cnt;
+static struct vfsmount *vfio_fs_mnt;
+
static struct vfio {
struct class *class;
struct list_head iommu_drivers_list;
@@ -97,6 +104,7 @@ struct vfio_device {
struct vfio_group *group;
struct list_head group_next;
void *device_data;
+ struct inode *inode;
};
#ifdef CONFIG_VFIO_NOIOMMU
@@ -529,6 +537,34 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
return group;
}
+static int vfio_fs_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type vfio_fs_type = {
+ .name = "vfio",
+ .owner = THIS_MODULE,
+ .init_fs_context = vfio_fs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static struct inode *vfio_fs_inode_new(void)
+{
+ struct inode *inode;
+ int ret;
+
+ ret = simple_pin_fs(&vfio_fs_type, &vfio_fs_mnt, &vfio_fs_cnt);
+ if (ret)
+ return ERR_PTR(ret);
+
+ inode = alloc_anon_inode(vfio_fs_mnt->mnt_sb);
+ if (IS_ERR(inode))
+ simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
+
+ return inode;
+}
+
/**
* Device objects - create, release, get, put, search
*/
@@ -539,11 +575,19 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group,
void *device_data)
{
struct vfio_device *device;
+ struct inode *inode;
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
return ERR_PTR(-ENOMEM);
+ inode = vfio_fs_inode_new();
+ if (IS_ERR(inode)) {
+ kfree(device);
+ return (struct vfio_device *)inode;
+ }
+ device->inode = inode;
+
kref_init(&device->kref);
device->dev = dev;
device->group = group;
@@ -574,6 +618,9 @@ static void vfio_device_release(struct kref *kref)
dev_set_drvdata(device->dev, NULL);
+ iput(device->inode);
+ simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
+
kfree(device);
/* vfio_del_group_dev may be waiting for this device */
@@ -1488,6 +1535,13 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
*/
filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+ /*
+ * Use the pseudo fs inode on the device to link all mmaps
+ * to the same address space, allowing us to unmap all vmas
+ * associated to this device using unmap_mapping_range().
+ */
+ filep->f_mapping = device->inode->i_mapping;
+
atomic_inc(&group->container_users);
fd_install(ret, filep);