[PATCH 1/2] locking: Use spin primitives for busy loops

From: Nicholas Piggin
Date: Sun Aug 20 2017 - 05:25:28 EST


Commit fd851a3cdc ("spin loop primitives for busy waiting") introduced
a begin/relax/end sequence for busy loops, to improve behaviour with
some architectures.

Convert most of the generic locking primitives over to use these spin
primitives.

Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
---
include/linux/bit_spinlock.h | 5 ++---
include/linux/seqlock.h | 9 ++++-----
kernel/locking/mcs_spinlock.h | 6 ++----
kernel/locking/mutex.c | 10 ++++++++--
kernel/locking/osq_lock.c | 17 +++++++++++++----
kernel/locking/qrwlock.c | 11 ++++++++---
kernel/locking/qspinlock.c | 14 ++++++++++----
kernel/locking/qspinlock_paravirt.h | 16 ++++++++++++----
kernel/locking/rwsem-xadd.c | 9 +++++++--
9 files changed, 66 insertions(+), 31 deletions(-)

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 3b5bafce4337..4cec87d9cde8 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -3,6 +3,7 @@

#include <linux/kernel.h>
#include <linux/preempt.h>
+#include <linux/processor.h>
#include <linux/atomic.h>
#include <linux/bug.h>

@@ -25,9 +26,7 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
preempt_enable();
- do {
- cpu_relax();
- } while (test_bit(bitnum, addr));
+ spin_until_cond(!test_bit(bitnum, addr));
preempt_disable();
}
#endif
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index ead97654c4e9..f4bd4a6c89d9 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -32,6 +32,7 @@
* by Keith Owens and Andrea Arcangeli
*/

+#include <linux/processor.h>
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <linux/lockdep.h>
@@ -108,12 +109,10 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
unsigned ret;

-repeat:
ret = READ_ONCE(s->sequence);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
+ if (unlikely(ret & 1))
+ spin_until_cond( !((ret = READ_ONCE(s->sequence)) & 1) );
+
return ret;
}

diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 6a385aabcce7..a91a0cc46a4c 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -27,8 +27,7 @@ struct mcs_spinlock {
*/
#define arch_mcs_spin_lock_contended(l) \
do { \
- while (!(smp_load_acquire(l))) \
- cpu_relax(); \
+ spin_until_cond(smp_load_acquire(l)); \
} while (0)
#endif

@@ -107,8 +106,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
if (likely(cmpxchg_release(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ spin_until_cond((next = READ_ONCE(node->next)) != 0);
}

/* Pass lock to next waiter. */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 858a07590e39..0ffa1cd7f12b 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -427,6 +427,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
bool ret = true;

rcu_read_lock();
+ spin_begin();
while (__mutex_owner(lock) == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
@@ -450,8 +451,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
break;
}

- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
rcu_read_unlock();

return ret;
@@ -532,6 +534,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
goto fail;
}

+ spin_begin();
for (;;) {
struct task_struct *owner;

@@ -553,8 +556,9 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();

if (!waiter)
osq_unlock(&lock->osq);
@@ -563,6 +567,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,


fail_unlock:
+ spin_end();
+
if (!waiter)
osq_unlock(&lock->osq);

diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index a3167941093b..9dd58bbe60b7 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -53,6 +53,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
*/
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;

+ spin_begin();
for (;;) {
if (atomic_read(&lock->tail) == curr &&
atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
@@ -80,8 +81,9 @@ osq_wait_next(struct optimistic_spin_queue *lock,
break;
}

- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();

return next;
}
@@ -107,6 +109,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
if (old == OSQ_UNLOCKED_VAL)
return true;

+ spin_begin();
+
prev = decode_cpu(old);
node->prev = prev;
WRITE_ONCE(prev->next, node);
@@ -129,8 +133,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
if (need_resched() || vcpu_is_preempted(node_cpu(node->prev)))
goto unqueue;

- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
return true;

unqueue:
@@ -152,10 +157,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* in which case we should observe @node->locked becomming
* true.
*/
- if (smp_load_acquire(&node->locked))
+ if (smp_load_acquire(&node->locked)) {
+ spin_end();
return true;
+ }

- cpu_relax();
+ spin_cpu_relax();

/*
* Or we race against a concurrent unqueue()'s step-B, in which
@@ -164,6 +171,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
prev = READ_ONCE(node->prev);
}

+ spin_end();
+
/*
* Step - B -- stabilize @next
*
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 2655f26ec882..186ff495097d 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -54,10 +54,12 @@ struct __qrwlock {
static __always_inline void
rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
{
+ spin_begin();
while ((cnts & _QW_WMASK) == _QW_LOCKED) {
- cpu_relax();
+ spin_cpu_relax();
cnts = atomic_read_acquire(&lock->cnts);
}
+ spin_end();
}

/**
@@ -124,6 +126,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
* Set the waiting flag to notify readers that a writer is pending,
* or wait for a previous writer to go away.
*/
+ spin_begin();
for (;;) {
struct __qrwlock *l = (struct __qrwlock *)lock;

@@ -131,7 +134,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
(cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
break;

- cpu_relax();
+ spin_cpu_relax();
}

/* When no more readers, set the locked flag */
@@ -142,8 +145,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
_QW_LOCKED) == _QW_WAITING))
break;

- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
+
unlock:
arch_spin_unlock(&lock->wait_lock);
}
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index fd24153e8a48..52ebcebf6fa8 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -362,6 +362,7 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
{
u32 val;

+ spin_begin();
for (;;) {
val = atomic_read(&lock->val);

@@ -372,14 +373,15 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
break;

/* not locked, but pending, wait until we observe the lock */
- cpu_relax();
+ spin_cpu_relax();
}

/* any unlock is good */
while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
- cpu_relax();
+ spin_cpu_relax();

done:
+ spin_end();
smp_acquire__after_ctrl_dep();
}
EXPORT_SYMBOL(queued_spin_unlock_wait);
@@ -428,8 +430,10 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* 0,1,0 -> 0,0,1
*/
if (val == _Q_PENDING_VAL) {
+ spin_begin();
while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}

/*
@@ -609,8 +613,10 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* contended path; wait for next if not observed yet, release.
*/
if (!next) {
+ spin_begin();
while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}

arch_mcs_spin_unlock_contended(&next->locked);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 4ccfcaae5b89..88817e41fadf 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -293,15 +293,19 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
bool wait_early;

for (;;) {
+ spin_begin();
for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
- if (READ_ONCE(node->locked))
+ if (READ_ONCE(node->locked)) {
+ spin_end();
return;
+ }
if (pv_wait_early(pp, loop)) {
wait_early = true;
break;
}
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();

/*
* Order pn->state vs pn->locked thusly:
@@ -417,11 +421,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
* disable lock stealing before attempting to acquire the lock.
*/
set_pending(lock);
+ spin_begin();
for (loop = SPIN_THRESHOLD; loop; loop--) {
- if (trylock_clear_pending(lock))
+ if (trylock_clear_pending(lock)) {
+ spin_end();
goto gotlock;
- cpu_relax();
+ }
+ spin_cpu_relax();
}
+ spin_end();
clear_pending(lock);


diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 34e727f18e49..2d0e539f1a95 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -358,6 +358,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
goto out;

rcu_read_lock();
+ spin_begin();
while (sem->owner == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
@@ -373,12 +374,14 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
*/
if (!owner->on_cpu || need_resched() ||
vcpu_is_preempted(task_cpu(owner))) {
+ spin_end();
rcu_read_unlock();
return false;
}

- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
rcu_read_unlock();
out:
/*
@@ -408,6 +411,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
* 2) readers own the lock as we can't determine if they are
* actively running or not.
*/
+ spin_begin();
while (rwsem_spin_on_owner(sem)) {
/*
* Try to acquire the lock
@@ -432,8 +436,9 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
osq_unlock(&sem->osq);
done:
preempt_enable();
--
2.13.3