Re: [PATCH v5 5/6] dm: add 'noexcl' option for dm-linear

From: Mike Snitzer
Date: Thu Feb 11 2021 - 13:18:07 EST


On Tue, Feb 09 2021 at 9:30am -0500,
Sergei Shtepa <sergei.shtepa@xxxxxxxxx> wrote:

> The 'noexcl' option allow to open underlying block-device
> without FMODE_EXCL.
>
> Signed-off-by: Sergei Shtepa <sergei.shtepa@xxxxxxxxx>
> ---
> drivers/md/dm-linear.c | 14 +++++++++++++-
> drivers/md/dm-table.c | 14 ++++++++------
> drivers/md/dm.c | 26 +++++++++++++++++++-------
> drivers/md/dm.h | 2 +-
> include/linux/device-mapper.h | 7 +++++++
> 5 files changed, 48 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
> index 00774b5d7668..b16d89802b9d 100644
> --- a/drivers/md/dm-linear.c
> +++ b/drivers/md/dm-linear.c
> @@ -33,7 +33,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> char dummy;
> int ret;
>
> - if (argc != 2) {
> + if ((argc < 2) || (argc > 3)) {
> ti->error = "Invalid argument count";
> return -EINVAL;
> }
> @@ -51,6 +51,18 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> }
> lc->start = tmp;
>
> + ti->non_exclusive = false;
> + if (argc > 2) {
> + if (strcmp("noexcl", argv[2]) == 0)
> + ti->non_exclusive = true;
> + else if (strcmp("excl", argv[2]) == 0)
> + ti->non_exclusive = false;
> + else {
> + ti->error = "Invalid exclusive option";
> + return -EINVAL;
> + }
> + }
> +
> ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev);
> if (ret) {
> ti->error = "Device lookup failed";
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index 4acf2342f7ad..f020459465bd 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -322,7 +322,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
> * device and not to touch the existing bdev field in case
> * it is accessed concurrently.
> */
> -static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
> +static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, bool non_exclusive,
> struct mapped_device *md)
> {
> int r;
> @@ -330,8 +330,8 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
>
> old_dev = dd->dm_dev;
>
> - r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
> - dd->dm_dev->mode | new_mode, &new_dev);
> + r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, dd->dm_dev->mode | new_mode,
> + non_exclusive, &new_dev);
> if (r)
> return r;
>
> @@ -387,7 +387,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
> if (!dd)
> return -ENOMEM;
>
> - if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) {
> + r = dm_get_table_device(t->md, dev, mode, ti->non_exclusive, &dd->dm_dev);
> + if (r) {
> kfree(dd);
> return r;
> }
> @@ -396,8 +397,9 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
> list_add(&dd->list, &t->devices);
> goto out;
>
> - } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
> - r = upgrade_mode(dd, mode, t->md);
> + } else if ((dd->dm_dev->mode != (mode | dd->dm_dev->mode)) &&
> + (dd->dm_dev->non_exclusive != ti->non_exclusive)) {
> + r = upgrade_mode(dd, mode, ti->non_exclusive, t->md);
> if (r)
> return r;
> }
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 00c41aa6d092..c25dcc2fdb89 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1117,33 +1117,44 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
> if (!td->dm_dev.bdev)
> return;
>
> - bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
> - blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
> + if (td->dm_dev.non_exclusive)
> + blkdev_put(td->dm_dev.bdev, td->dm_dev.mode);
> + else {
> + bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
> + blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
> + }
> +
> +
> + blkdev_put(td->dm_dev.bdev, td->dm_dev.mode);
> +
> put_dax(td->dm_dev.dax_dev);
> td->dm_dev.bdev = NULL;
> td->dm_dev.dax_dev = NULL;
> + td->dm_dev.non_exclusive = false;
> }
>
> static struct table_device *find_table_device(struct list_head *l, dev_t dev,
> - fmode_t mode)
> + fmode_t mode, bool non_exclusive)
> {
> struct table_device *td;
>
> list_for_each_entry(td, l, list)
> - if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
> + if (td->dm_dev.bdev->bd_dev == dev &&
> + td->dm_dev.mode == mode &&
> + td->dm_dev.non_exclusive == non_exclusive)
> return td;
>
> return NULL;
> }
>
> -int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
> +int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, bool non_exclusive,
> struct dm_dev **result)
> {
> int r;
> struct table_device *td;
>
> mutex_lock(&md->table_devices_lock);
> - td = find_table_device(&md->table_devices, dev, mode);
> + td = find_table_device(&md->table_devices, dev, mode, non_exclusive);
> if (!td) {
> td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
> if (!td) {
> @@ -1154,7 +1165,8 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
> td->dm_dev.mode = mode;
> td->dm_dev.bdev = NULL;
>
> - if ((r = open_table_device(td, dev, md))) {
> + r = open_table_device(td, dev, md, non_exclusive);
> + if (r) {
> mutex_unlock(&md->table_devices_lock);
> kfree(td);
> return r;
> diff --git a/drivers/md/dm.h b/drivers/md/dm.h
> index fffe1e289c53..7bf20fb2de74 100644
> --- a/drivers/md/dm.h
> +++ b/drivers/md/dm.h
> @@ -179,7 +179,7 @@ int dm_open_count(struct mapped_device *md);
> int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred);
> int dm_cancel_deferred_remove(struct mapped_device *md);
> int dm_request_based(struct mapped_device *md);
> -int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
> +int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, bool non_exclusive,
> struct dm_dev **result);
> void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
>
> diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
> index 61a66fb8ebb3..70002363bfc0 100644
> --- a/include/linux/device-mapper.h
> +++ b/include/linux/device-mapper.h
> @@ -150,6 +150,7 @@ struct dm_dev {
> struct block_device *bdev;
> struct dax_device *dax_dev;
> fmode_t mode;
> + bool non_exclusive;
> char name[16];
> };
>
> @@ -325,6 +326,12 @@ struct dm_target {
> * whether or not its underlying devices have support.
> */
> bool discards_supported:1;
> +
> + /*
> + * Set if this target needs to open device without FMODE_EXCL
> + * mode.
> + */
> + bool non_exclusive:1;
> };
>
> void *dm_per_bio_data(struct bio *bio, size_t data_size);
> --
> 2.20.1
>

I'm really not liking this tug-of-war about FMODE_EXCL vs not.
Especially dislike the prospect of needing to change _every_ DM target
that would be made to support blk_interposer.

I've said this before, private or otherwise, but: Hannes' approach that
fell back to opening without FMODE_EXCL if FMODE_EXCL open failed. Have
you explored that kind of approach?

You _should_ be able to infer that interposer is being used given the
requirement to use an explicit remap ioctl to establish the use of
interposer.

Mike