[tip: locking/urgent] locking/rtmutex: Make sure we wake anything on the wake_q when we release the lock->wait_lock

From: tip-bot2 for John Stultz
Date: Tue Dec 24 2024 - 04:48:46 EST


The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 4a077914578183ec397ad09f7156a357e00e5d72
Gitweb: https://git.kernel.org/tip/4a077914578183ec397ad09f7156a357e00e5d72
Author: John Stultz <jstultz@xxxxxxxxxx>
AuthorDate: Thu, 12 Dec 2024 14:21:33 -08:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 17 Dec 2024 17:47:24 +01:00

locking/rtmutex: Make sure we wake anything on the wake_q when we release the lock->wait_lock

Bert reported seeing occasional boot hangs when running with
PREEPT_RT and bisected it down to commit 894d1b3db41c
("locking/mutex: Remove wakeups from under mutex::wait_lock").

It looks like I missed a few spots where we drop the wait_lock and
potentially call into schedule without waking up the tasks on the
wake_q structure. Since the tasks being woken are ww_mutex tasks
they need to be able to run to release the mutex and unblock the
task that currently is planning to wake them. Thus we can deadlock.

So make sure we wake the wake_q tasks when we unlock the wait_lock.

Closes: https://lore.kernel.org/lkml/20241211182502.2915-1-spasswolf@xxxxxx
Fixes: 894d1b3db41c ("locking/mutex: Remove wakeups from under mutex::wait_lock")
Reported-by: Bert Karwatzki <spasswolf@xxxxxx>
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/20241212222138.2400498-1-jstultz@xxxxxxxxxx
---
kernel/locking/rtmutex.c | 18 ++++++++++++++++--
kernel/locking/rtmutex_api.c | 2 +-
2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index e858de2..697a56d 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1292,7 +1292,13 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
*/
get_task_struct(owner);

+ preempt_disable();
raw_spin_unlock_irq(&lock->wait_lock);
+ /* wake up any tasks on the wake_q before calling rt_mutex_adjust_prio_chain */
+ wake_up_q(wake_q);
+ wake_q_init(wake_q);
+ preempt_enable();
+

res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
next_lock, waiter, task);
@@ -1596,6 +1602,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
+ * @wake_q: wake_q of tasks to wake when we drop the lock->wait_lock
*
* Must be called with lock->wait_lock held and interrupts disabled
*/
@@ -1603,7 +1610,8 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
struct ww_acquire_ctx *ww_ctx,
unsigned int state,
struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter)
+ struct rt_mutex_waiter *waiter,
+ struct wake_q_head *wake_q)
__releases(&lock->wait_lock) __acquires(&lock->wait_lock)
{
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
@@ -1634,7 +1642,13 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
owner = rt_mutex_owner(lock);
else
owner = NULL;
+ preempt_disable();
raw_spin_unlock_irq(&lock->wait_lock);
+ if (wake_q) {
+ wake_up_q(wake_q);
+ wake_q_init(wake_q);
+ }
+ preempt_enable();

if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
rt_mutex_schedule();
@@ -1708,7 +1722,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,

ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q);
if (likely(!ret))
- ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
+ ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter, wake_q);

if (likely(!ret)) {
/* acquired the lock */
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 33ea31d..191e472 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -383,7 +383,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
raw_spin_lock_irq(&lock->wait_lock);
/* sleep on the mutex */
set_current_state(TASK_INTERRUPTIBLE);
- ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter);
+ ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter, NULL);
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.