[PATCH -next v2] md: Hold mddev->reconfig_mutex when trying to get mddev->sync_thread

From: Li Lingfeng
Date: Thu Aug 03 2023 - 02:56:32 EST


Commit ba9d9f1a707f ("Revert "md: unlock mddev before reap sync_thread in
action_store"") removed the scenario of calling md_unregister_thread()
without holding mddev->reconfig_mutex, so add a lock holding check before
acquiring mddev->sync_thread by passing mdev to md_unregister_thread().

Signed-off-by: Li Lingfeng <lilingfeng@xxxxxxxxxxxxxxx>
---
v1->v2: optimize summary

drivers/md/md-cluster.c | 8 ++++----
drivers/md/md.c | 9 +++++----
drivers/md/md.h | 2 +-
drivers/md/raid1.c | 4 ++--
drivers/md/raid10.c | 2 +-
drivers/md/raid5-cache.c | 2 +-
drivers/md/raid5.c | 2 +-
7 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 3d9fd74233df..1e26eb223349 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -952,8 +952,8 @@ static int join(struct mddev *mddev, int nodes)
return 0;
err:
set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
- md_unregister_thread(&cinfo->recovery_thread);
- md_unregister_thread(&cinfo->recv_thread);
+ md_unregister_thread(mddev, &cinfo->recovery_thread);
+ md_unregister_thread(mddev, &cinfo->recv_thread);
lockres_free(cinfo->message_lockres);
lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres);
@@ -1015,8 +1015,8 @@ static int leave(struct mddev *mddev)
resync_bitmap(mddev);

set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
- md_unregister_thread(&cinfo->recovery_thread);
- md_unregister_thread(&cinfo->recv_thread);
+ md_unregister_thread(mddev, &cinfo->recovery_thread);
+ md_unregister_thread(mddev, &cinfo->recv_thread);
lockres_free(cinfo->message_lockres);
lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a3d98273b295..5c3c19b8d509 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6258,7 +6258,7 @@ static void mddev_detach(struct mddev *mddev)
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(mddev, &mddev->thread);
if (mddev->queue)
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
}
@@ -7990,9 +7990,10 @@ struct md_thread *md_register_thread(void (*run) (struct md_thread *),
}
EXPORT_SYMBOL(md_register_thread);

-void md_unregister_thread(struct md_thread __rcu **threadp)
+void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
{
- struct md_thread *thread = rcu_dereference_protected(*threadp, true);
+ struct md_thread *thread = rcu_dereference_protected(*threadp,
+ lockdep_is_held(&mddev->reconfig_mutex));

if (!thread)
return;
@@ -9484,7 +9485,7 @@ void md_reap_sync_thread(struct mddev *mddev)
bool is_reshaped = false;

/* resync has finished, collect result */
- md_unregister_thread(&mddev->sync_thread);
+ md_unregister_thread(mddev, &mddev->sync_thread);
atomic_inc(&mddev->sync_seq);

if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 8ae957480976..9bcb77bca963 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -761,7 +761,7 @@ extern struct md_thread *md_register_thread(
void (*run)(struct md_thread *thread),
struct mddev *mddev,
const char *name);
-extern void md_unregister_thread(struct md_thread __rcu **threadp);
+extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp);
extern void md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 23d211969565..581dfbdfca89 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3152,7 +3152,7 @@ static int raid1_run(struct mddev *mddev)
* RAID1 needs at least one disk in active
*/
if (conf->raid_disks - mddev->degraded < 1) {
- md_unregister_thread(&conf->thread);
+ md_unregister_thread(mddev, &conf->thread);
ret = -EINVAL;
goto abort;
}
@@ -3179,7 +3179,7 @@ static int raid1_run(struct mddev *mddev)

ret = md_integrity_register(mddev);
if (ret) {
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(mddev, &mddev->thread);
goto abort;
}
return 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 16aa9d735880..6188b71186f4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4320,7 +4320,7 @@ static int raid10_run(struct mddev *mddev)
return 0;

out_free_conf:
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(mddev, &mddev->thread);
raid10_free_conf(conf);
mddev->private = NULL;
out:
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 2eac4a50d99b..a29b9650260a 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3168,7 +3168,7 @@ void r5l_exit_log(struct r5conf *conf)
{
struct r5l_log *log = conf->log;

- md_unregister_thread(&log->reclaim_thread);
+ md_unregister_thread(conf->mddev, &log->reclaim_thread);

/*
* 'reconfig_mutex' is held by caller, set 'confg->log' to NULL to
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 32a87193bad7..4cb9c608ee19 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -8107,7 +8107,7 @@ static int raid5_run(struct mddev *mddev)

return 0;
abort:
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(mddev, &mddev->thread);
print_raid5_conf(conf);
free_conf(conf);
mddev->private = NULL;
--
2.39.2