[PATCH 2/2] md: create symlink with disk holder after mddev resume

From: linan666
Date: Thu Dec 21 2023 - 02:13:44 EST


From: Li Nan <linan122@xxxxxxxxxx>

There is a risk of deadlock when a process gets disk->open_mutex after
suspending mddev, because other processes may hold open_mutex while
submitting io. For example:

T1 T2
blkdev_open
bdev_open_by_dev
mutex_lock(&disk->open_mutex)
md_ioctl
mddev_suspend_and_lock
mddev_suspend
md_add_new_disk
bind_rdev_to_array
bd_link_disk_holder
//wait open_mutex
blkdev_get_whole
bdev_disk_changed
efi_partition
read_lba
...
md_submit_bio
md_handle_request
//wait resume

Fix it by getting disk->open_mutex after mddev resume, iterating each
mddev->disk to create symlink for rdev which has not been created yet.
and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been
deleted from mddev->disks here, which can avoid concurrent bind and unbind,

Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls involed array reconfiguration")
Signed-off-by: Li Nan <linan122@xxxxxxxxxx>
---
drivers/md/md.c | 39 +++++++++++++++++++++++++++++----------
1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index d6612b922c76..c128570f2a5d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(mddev_resume);

+static void md_link_disk_holder(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev) {
+ if (test_bit(SymlinkCreated, &rdev->flags))
+ continue;
+ if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
+ set_bit(SymlinkCreated, &rdev->flags);
+ }
+ rcu_read_unlock();
+}
+
/*
* Generic flush handling for md
*/
@@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev)

list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
list_del_init(&rdev->same_set);
+ if (test_bit(SymlinkCreated, &rdev->flags)) {
+ bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
+ clear_bit(SymlinkCreated, &rdev->flags);
+ }
+ rdev->mddev = NULL;
kobject_del(&rdev->kobj);
export_rdev(rdev, mddev);
}
@@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");

list_add_rcu(&rdev->same_set, &mddev->disks);
- if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
- set_bit(SymlinkCreated, &rdev->flags);

/* May as well allow recovery to be retried once */
mddev->recovery_disabled++;
@@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
{
struct mddev *mddev = rdev->mddev;

- if (test_bit(SymlinkCreated, &rdev->flags)) {
- bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
- clear_bit(SymlinkCreated, &rdev->flags);
- }
list_del_rcu(&rdev->same_set);
pr_debug("md: unbind<%pg>\n", rdev->bdev);
mddev_destroy_serial_pool(rdev->mddev, rdev);
- rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
sysfs_put(rdev->sysfs_unack_badblocks);
@@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
if (err)
export_rdev(rdev, mddev);
mddev_unlock_and_resume(mddev);
- if (!err)
+ if (!err) {
+ md_link_disk_holder(mddev);
md_new_event();
+ }
return err ? err : len;
}

@@ -6606,6 +6620,7 @@ static void autorun_devices(int part)
}
autorun_array(mddev);
mddev_unlock_and_resume(mddev);
+ md_link_disk_holder(mddev);
}
/* on success, candidates will be empty, on error
* it won't...
@@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
err != -EINVAL)
mddev->hold_active = 0;

- md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
- mddev_unlock(mddev);
+ if (md_ioctl_need_suspend(cmd)) {
+ mddev_unlock_and_resume(mddev);
+ md_link_disk_holder(mddev);
+ } else {
+ mddev_unlock(mddev);
+ }

out:
if(did_set_md_closing)
--
2.39.2