Re: [PATCH v5 02/19] locking/mutex: Removes wakeups from under mutex::wait_lock

From: Waiman Long
Date: Tue Aug 22 2023 - 15:12:32 EST


On 8/19/23 02:08, John Stultz wrote:
In preparation to nest mutex::wait_lock under rq::lock we need to remove
wakeups from under it.

Cc: Joel Fernandes <joelaf@xxxxxxxxxx>
Cc: Qais Yousef <qyousef@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Zimuzo Ezeozue <zezeozue@xxxxxxxxxx>
Cc: Youssef Esmat <youssefesmat@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: "Paul E . McKenney" <paulmck@xxxxxxxxxx>
Cc: kernel-team@xxxxxxxxxxx
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
[Heavily changed after 55f036ca7e74 ("locking: WW mutex cleanup") and
08295b3b5bee ("locking: Implement an algorithm choice for Wound-Wait
mutexes")]
Signed-off-by: Juri Lelli <juri.lelli@xxxxxxxxxx>
[jstultz: rebased to mainline, added extra wake_up_q & init
to avoid hangs, similar to Connor's rework of this patch]
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
---
v5:
* Reverted back to an earlier version of this patch to undo
the change that kept the wake_q in the ctx structure, as
that broke the rule that the wake_q must always be on the
stack, as its not safe for concurrency.
---
kernel/locking/mutex.c | 15 ++++++++++++---
kernel/locking/ww_mutex.h | 29 ++++++++++++++++++-----------
2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d973fe6041bf..118b6412845c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -570,6 +570,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
+ DEFINE_WAKE_Q(wake_q);
struct mutex_waiter waiter;
struct ww_mutex *ww;
int ret;
@@ -620,7 +621,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
*/
if (__mutex_trylock(lock)) {
if (ww_ctx)
- __ww_mutex_check_waiters(lock, ww_ctx);
+ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
goto skip_wait;
}
@@ -640,7 +641,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
* Add in stamp order, waking up waiters that must kill
* themselves.
*/
- ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
+ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q);
if (ret)
goto err_early_kill;
}
@@ -676,6 +677,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
}
raw_spin_unlock(&lock->wait_lock);
+ /* Make sure we do wakeups before calling schedule */
+ wake_up_q(&wake_q);
+ wake_q_init(&wake_q);
+

The wake_q may have task to wake up only in the case of ww_mutex which is a minority in the kernel. IOW, wake_up_q() which is a function call will do nothing in most cases. From an optimization point of view, it is better to do a "!wake_q_empty(&wake_q)" check before calling wake_up_q().


schedule_preempt_disabled();
first = __mutex_waiter_is_first(lock, &waiter);
@@ -709,7 +714,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
*/
if (!ww_ctx->is_wait_die &&
!__mutex_waiter_is_first(lock, &waiter))
- __ww_mutex_check_waiters(lock, ww_ctx);
+ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
}
__mutex_remove_waiter(lock, &waiter);
@@ -725,6 +730,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
ww_mutex_lock_acquired(ww, ww_ctx);
raw_spin_unlock(&lock->wait_lock);
+ wake_up_q(&wake_q);
preempt_enable();
return 0;
@@ -736,6 +742,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
raw_spin_unlock(&lock->wait_lock);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, ip);
+ wake_up_q(&wake_q);
preempt_enable();
return ret;
}
@@ -946,9 +953,11 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
+ preempt_disable();
raw_spin_unlock(&lock->wait_lock);
wake_up_q(&wake_q);
+ preempt_enable();
}

I think it looks better to put the preempt_disable() right before raw_spin_lock() for proper nesting.

Cheers,
Longman