Re: [PATCH v2 1/4] vfio-mdev: Fix remove race
From: Kirti Wankhede
Date: Sun Dec 25 2016 - 12:10:39 EST
On 12/23/2016 1:51 AM, Alex Williamson wrote:
> Using the mtty mdev sample driver we can generate a remove race by
> starting one shell that continuously creates mtty devices and several
> other shells all attempting to remove devices, in my case four remove
> shells. The fault occurs in mdev_remove_sysfs_files() where the
> passed type arg is NULL, which suggests we've received a struct device
> in mdev_device_remove() but it's in some sort of teardown state. The
> solution here is to make use of the accidentally unused list_head on
> the mdev_device such that the mdev core keeps a list of all the mdev
> devices. This allows us to validate that we have a valid mdev before
> we start removal, remove it from the list to prevent others from
> working on it, and if the vendor driver refuses to remove, we can
> re-add it to the list.
>
Alex,
Writing 1 on 'remove' first removes itself, i.e. calls
device_remove_file_self(dev, attr). So if the file is removed then
device_remove_file_self() should return false, isn't that returns false?
kernfs_remove_self() hold the mutex that should handle this condition.
Thanks,
Kirti.
> Cc: Kirti Wankhede <kwankhede@xxxxxxxxxx>
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
> drivers/vfio/mdev/mdev_core.c | 36 ++++++++++++++++++++++++++++++++++--
> 1 file changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index be1ee89..6bb4d4c 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -27,6 +27,9 @@
> static DEFINE_MUTEX(parent_list_lock);
> static struct class_compat *mdev_bus_compat_class;
>
> +static LIST_HEAD(mdev_list);
> +static DEFINE_MUTEX(mdev_list_lock);
> +
> static int _find_mdev_device(struct device *dev, void *data)
> {
> struct mdev_device *mdev;
> @@ -316,6 +319,11 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> dev_dbg(&mdev->dev, "MDEV: created\n");
>
> mutex_unlock(&parent->lock);
> +
> + mutex_lock(&mdev_list_lock);
> + list_add(&mdev->next, &mdev_list);
> + mutex_unlock(&mdev_list_lock);
> +
> return ret;
>
> create_failed:
> @@ -329,12 +337,30 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
>
> int mdev_device_remove(struct device *dev, bool force_remove)
> {
> - struct mdev_device *mdev;
> + struct mdev_device *mdev, *tmp;
> struct parent_device *parent;
> struct mdev_type *type;
> int ret;
> + bool found = false;
>
> mdev = to_mdev_device(dev);
> +
> + mutex_lock(&mdev_list_lock);
> + list_for_each_entry(tmp, &mdev_list, next) {
> + if (tmp == mdev) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (found)
> + list_del(&mdev->next);
> +
> + mutex_unlock(&mdev_list_lock);
> +
> + if (!found)
> + return -ENODEV;
> +
> type = to_mdev_type(mdev->type_kobj);
> parent = mdev->parent;
> mutex_lock(&parent->lock);
> @@ -342,6 +368,11 @@ int mdev_device_remove(struct device *dev, bool force_remove)
> ret = mdev_device_remove_ops(mdev, force_remove);
> if (ret) {
> mutex_unlock(&parent->lock);
> +
> + mutex_lock(&mdev_list_lock);
> + list_add(&mdev->next, &mdev_list);
> + mutex_unlock(&mdev_list_lock);
> +
> return ret;
> }
>
> @@ -349,7 +380,8 @@ int mdev_device_remove(struct device *dev, bool force_remove)
> device_unregister(dev);
> mutex_unlock(&parent->lock);
> mdev_put_parent(parent);
> - return ret;
> +
> + return 0;
> }
>
> static int __init mdev_init(void)
>