[RFC][PATCH 3/7] qspinlock: Add pending bit

From: Peter Zijlstra
Date: Mon Mar 10 2014 - 12:02:48 EST


Because the qspinlock needs to touch a second cacheline; add a pending
bit and allow a single in-word spinner before we punt to the second
cacheline.

Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
include/asm-generic/qspinlock_types.h | 12 +++-
kernel/locking/qspinlock.c | 92 ++++++++++++++++++++++++++++------
2 files changed, 87 insertions(+), 17 deletions(-)

--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -38,15 +38,20 @@ typedef struct qspinlock {
* Bitfields in the atomic value:
*
* 0- 7: locked byte
- * 8- 9: tail index
- * 10-31: tail cpu (+1)
+ * 8: pending
+ * 9-10: tail index
+ * 11-31: tail cpu (+1)
*/

#define _Q_LOCKED_OFFSET 0
#define _Q_LOCKED_BITS 8
#define _Q_LOCKED_MASK (((1U << _Q_LOCKED_BITS) - 1) << _Q_LOCKED_OFFSET)

-#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS 1
+#define _Q_PENDING_MASK (((1U << _Q_PENDING_BITS) - 1) << _Q_PENDING_OFFSET)
+
+#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
#define _Q_TAIL_IDX_BITS 2
#define _Q_TAIL_IDX_MASK (((1U << _Q_TAIL_IDX_BITS) - 1) << _Q_TAIL_IDX_OFFSET)

@@ -55,5 +60,6 @@ typedef struct qspinlock {
#define _Q_TAIL_CPU_MASK (((1U << _Q_TAIL_CPU_BITS) - 1) << _Q_TAIL_CPU_OFFSET)

#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)

#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -83,6 +83,8 @@ static inline struct mcs_spinlock *decod
return per_cpu_ptr(&mcs_nodes[idx], cpu);
}

+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
/**
* queue_spin_lock_slowpath - acquire the queue spinlock
* @lock: Pointer to queue spinlock structure
@@ -91,13 +93,16 @@ static inline struct mcs_spinlock *decod
*
* fast : slow : unlock
* : :
- * uncontended (0,0) --:--> (0,1) --------------------------------:--> (*,0)
- * : | ^--------. / :
- * : v \ | :
- * uncontended : (n,x) --+--> (n,0) | :
+ * uncontended (0,0,0) --:--> (0,0,1) ------------------------------:--> (*,*,0)
+ * : | ^--------.------. / :
+ * : v \ \ | :
+ * pending : (0,1,1) +--> (0,1,0) \ | :
+ * : | ^--' | | :
+ * : v | | :
+ * uncontended : (n,x,y) +--> (n,0,0) --' | :
* queue : | ^--' | :
* : v | :
- * contended : (*,x) --+--> (*,0) -----> (*,1) ---' :
+ * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
* queue : ^--' :
*
*/
@@ -109,6 +114,61 @@ void queue_spin_lock_slowpath(struct qsp

BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));

+ /*
+ * trylock || pending
+ *
+ * 0,0,0 -> 0,0,1 ; trylock
+ * 0,0,1 -> 0,1,1 ; pending
+ */
+ for (;;) {
+ /*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ new = _Q_LOCKED_VAL;
+ if (val == new)
+ new |= _Q_PENDING_VAL;
+
+ old = atomic_cmpxchg(&lock->val, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ /*
+ * we won the trylock
+ */
+ if (new == _Q_LOCKED_VAL)
+ return;
+
+ /*
+ * we're pending, wait for the owner to go away.
+ *
+ * *,1,1 -> *,1,0
+ */
+ while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+ cpu_relax();
+
+ /*
+ * take ownership and clear the pending bit.
+ *
+ * *,1,0 -> *,0,1
+ */
+ for (;;) {
+ new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+
+ old = atomic_cmpxchg(&lock->val, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+ return;
+
+queue:
node = this_cpu_ptr(&mcs_nodes[0]);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
@@ -118,15 +178,18 @@ void queue_spin_lock_slowpath(struct qsp
node->next = NULL;

/*
+ * we already touched the queueing cacheline; don't bother with pending
+ * stuff.
+ *
* trylock || xchg(lock, node)
*
- * 0,0 -> 0,1 ; trylock
- * p,x -> n,x ; prev = xchg(lock, node)
+ * 0,0,0 -> 0,0,1 ; trylock
+ * p,y,x -> n,y,x ; prev = xchg(lock, node)
*/
for (;;) {
new = _Q_LOCKED_VAL;
if (val)
- new = tail | (val & _Q_LOCKED_MASK);
+ new = tail | (val & _Q_LOCKED_PENDING_MASK);

old = atomic_cmpxchg(&lock->val, val, new);
if (old == val)
@@ -144,7 +207,7 @@ void queue_spin_lock_slowpath(struct qsp
/*
* if there was a previous node; link it and wait.
*/
- if (old & ~_Q_LOCKED_MASK) {
+ if (old & ~_Q_LOCKED_PENDING_MASK) {
prev = decode_tail(old);
ACCESS_ONCE(prev->next) = node;

@@ -152,18 +215,19 @@ void queue_spin_lock_slowpath(struct qsp
}

/*
- * we're at the head of the waitqueue, wait for the owner to go away.
+ * we're at the head of the waitqueue, wait for the owner & pending to
+ * go away.
*
- * *,x -> *,0
+ * *,x,y -> *,0,0
*/
- while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+ while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
cpu_relax();

/*
* claim the lock:
*
- * n,0 -> 0,1 : lock, uncontended
- * *,0 -> *,1 : lock, contended
+ * n,0,0 -> 0,0,1 : lock, uncontended
+ * *,0,0 -> *,0,1 : lock, contended
*/
for (;;) {
new = _Q_LOCKED_VAL;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/