[PATCH v3 06/12] locking/ww_mutex: Add waiters in stamp order

From: Nicolai HÃhnle
Date: Wed Dec 21 2016 - 13:48:30 EST


From: Nicolai HÃhnle <Nicolai.Haehnle@xxxxxxx>

Add regular waiters in stamp order. Keep adding waiters that have no
context in FIFO order and take care not to starve them.

While adding our task as a waiter, back off if we detect that there is a
waiter with a lower stamp in front of us.

Make sure to call lock_contended even when we back off early.

For w/w mutexes, being first in the wait list is only stable when taking the
lock without a context. Therefore, the purpose of the first flag is split into
two: 'first' remains to indicate whether we want to spin optimistically, while
'handoff' indicates that we should be prepared to accept a handoff.

For w/w locking with a context, we always accept handoffs after the first
schedule(), to handle the following sequence of events:

1. Task #0 unlocks and hands off to Task #2 which is first in line
2. Task #1 adds itself in front of Task #2
3. Task #2 wakes up and must accept the handoff even though it is no longer
first in line

v2:
- rein in the indentation of __ww_mutex_add_waiter a bit
- set contending_lock in __ww_mutex_add_waiter (Chris Wilson)

v3:
- split 'first' into 'first' and 'handoff' to avoid moving the trylock calls
around so much
- scan the wait_list in reverse order in __ww_mutex_add_waiter

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Maarten Lankhorst <dev@xxxxxxxxxxxxxx>
Cc: Daniel Vetter <daniel@xxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Nicolai HÃhnle <nicolai.haehnle@xxxxxxx>
---
include/linux/mutex.h | 3 ++
kernel/locking/mutex.c | 97 +++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b97870f..118a3b6 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,8 @@
#include <linux/osq_lock.h>
#include <linux/debug_locks.h>

+struct ww_acquire_ctx;
+
/*
* Simple, straightforward mutexes with strict semantics:
*
@@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
struct mutex_waiter {
struct list_head list;
struct task_struct *task;
+ struct ww_acquire_ctx *ww_ctx;
#ifdef CONFIG_DEBUG_MUTEXES
void *magic;
#endif
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 282c6de..5b1ca20 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -620,6 +620,52 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
return 0;
}

+static inline int __sched
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+ struct mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter *cur;
+ struct list_head *pos;
+
+ if (!ww_ctx) {
+ list_add_tail(&waiter->list, &lock->wait_list);
+ return 0;
+ }
+
+ /*
+ * Add the waiter before the first waiter with a higher stamp.
+ * Waiters without a context are skipped to avoid starving
+ * them.
+ */
+ pos = &lock->wait_list;
+ list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+ /* Back off immediately if necessary. */
+ if (ww_ctx->acquired > 0) {
+#ifdef CONFIG_DEBUG_MUTEXES
+ struct ww_mutex *ww;
+
+ ww = container_of(lock, struct ww_mutex, base);
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+ ww_ctx->contending_lock = ww;
+#endif
+ return -EDEADLK;
+ }
+
+ break;
+ }
+
+ pos = &cur->list;
+ }
+
+ list_add_tail(&waiter->list, pos);
+ return 0;
+}
+
/*
* Lock a mutex (possibly interruptible), slowpath:
*/
@@ -632,6 +678,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct mutex_waiter waiter;
unsigned long flags;
bool first = false;
+ bool handoff = false;
struct ww_mutex *ww;
int ret;

@@ -665,15 +712,25 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
debug_mutex_lock_common(lock, &waiter);
debug_mutex_add_waiter(lock, &waiter, task);

- /* add waiting tasks to the end of the waitqueue (FIFO): */
- list_add_tail(&waiter.list, &lock->wait_list);
+ lock_contended(&lock->dep_map, ip);
+
+ if (!use_ww_ctx) {
+ /* add waiting tasks to the end of the waitqueue (FIFO): */
+ list_add_tail(&waiter.list, &lock->wait_list);
+ } else {
+ /* Add in stamp order, waking up waiters that must back off. */
+ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
+ if (ret)
+ goto err_early_backoff;
+
+ waiter.ww_ctx = ww_ctx;
+ }
+
waiter.task = task;

if (__mutex_waiter_is_first(lock, &waiter))
__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);

- lock_contended(&lock->dep_map, ip);
-
set_task_state(task, state);
for (;;) {
/*
@@ -682,7 +739,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* before testing the error conditions to make sure we pick up
* the handoff.
*/
- if (__mutex_trylock(lock, first))
+ if (__mutex_trylock(lock, handoff))
goto acquired;

/*
@@ -711,13 +768,34 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* or we must see its unlock and acquire.
*/

- if (!first && __mutex_waiter_is_first(lock, &waiter)) {
- first = true;
+ if (use_ww_ctx && ww_ctx) {
+ /*
+ * Always re-check whether we're in first position. We
+ * don't want to spin if another task with a lower
+ * stamp has taken our position.
+ *
+ * We also may have to set the handoff flag again, if
+ * our position at the head was temporarily taken away.
+ */
+ first = __mutex_waiter_is_first(lock, &waiter);
+
+ if (first)
+ __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
+
+ /*
+ * Always be prepared to accept a handoff after the
+ * first wait, because we may have been the first
+ * waiter during unlock.
+ */
+ handoff = true;
+ } else if (!first && __mutex_waiter_is_first(lock, &waiter)) {
+ first = handoff = true;
__mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
}

- if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
- __mutex_trylock(lock, first))
+ if ((first &&
+ mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
+ __mutex_trylock(lock, handoff))
break;

spin_lock_mutex(&lock->wait_lock, flags);
@@ -746,6 +824,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
err:
__set_task_state(task, TASK_RUNNING);
mutex_remove_waiter(lock, &waiter, task);
+err_early_backoff:
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
--
2.7.4