[PATCH v7 1/5] locking/qspinlock: relaxes cmpxchg & xchg ops in native code

From: Waiman Long
Date: Tue Sep 22 2015 - 16:52:22 EST

Next message: Waiman Long: "[PATCH v7 2/5] locking/pvqspinlock, x86: Optimize PV unlock code path"
Previous message: Waiman Long: "[PATCH v7 3/5] locking/pvqspinlock: Collect slowpath lock statistics"
In reply to: Waiman Long: "[PATCH v7 3/5] locking/pvqspinlock: Collect slowpath lock statistics"
Next in thread: Waiman Long: "[PATCH v7 2/5] locking/pvqspinlock, x86: Optimize PV unlock code path"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This patch replaces the cmpxchg() and xchg() calls in the native
qspinlock code with more relaxed versions of those calls to enable
other architectures to adopt queued spinlocks with less performance
overhead.

Signed-off-by: Waiman Long <Waiman.Long@xxxxxxx>
---
include/asm-generic/qspinlock.h | 9 +++++----
kernel/locking/qspinlock.c | 24 +++++++++++++++++++-----
2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index e2aadbc..39e1cb2 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -12,8 +12,9 @@
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@xxxxxx>
+ * Authors: Waiman Long <waiman.long@xxxxxxx>
*/
#ifndef __ASM_GENERIC_QSPINLOCK_H
#define __ASM_GENERIC_QSPINLOCK_H
@@ -62,7 +63,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
{
if (!atomic_read(&lock->val) &&
- (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+ (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
return 1;
return 0;
}
@@ -77,7 +78,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val;

- val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
+ val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
if (likely(val == 0))
return;
queued_spin_lock_slowpath(lock, val);
@@ -93,7 +94,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
/*
* smp_mb__before_atomic() in order to guarantee release semantics
*/
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_sub(_Q_LOCKED_VAL, &lock->val);
}
#endif
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 87e9ce6..d2f3fda 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -14,8 +14,9 @@
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2013-2014 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@xxxxxx>
+ * Authors: Waiman Long <waiman.long@xxxxxxx>
* Peter Zijlstra <peterz@xxxxxxxxxxxxx>
*/

@@ -176,7 +177,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
struct __qspinlock *l = (void *)lock;

- return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+ /*
+ * Use release semantics to make sure that the MCS node is properly
+ * initialized before changing the tail code.
+ */
+ return (u32)xchg_release(&l->tail,
+ tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}

#else /* _Q_PENDING_BITS == 8 */
@@ -208,7 +214,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)

for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
- old = atomic_cmpxchg(&lock->val, val, new);
+ /*
+ * Use release semantics to make sure that the MCS node is
+ * properly initialized before changing the tail code.
+ */
+ old = atomic_cmpxchg_release(&lock->val, val, new);
if (old == val)
break;

@@ -319,7 +329,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
if (val == new)
new |= _Q_PENDING_VAL;

- old = atomic_cmpxchg(&lock->val, val, new);
+ old = atomic_cmpxchg_acquire(&lock->val, val, new);
if (old == val)
break;

@@ -426,7 +436,11 @@ queue:
set_locked(lock);
break;
}
- old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+ /*
+ * The smp_load_acquire() call above has provided the necessary
+ * acquire semantics required for locking.
+ */
+ old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
if (old == val)
goto release; /* No contention */

--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Waiman Long: "[PATCH v7 2/5] locking/pvqspinlock, x86: Optimize PV unlock code path"
Previous message: Waiman Long: "[PATCH v7 3/5] locking/pvqspinlock: Collect slowpath lock statistics"
In reply to: Waiman Long: "[PATCH v7 3/5] locking/pvqspinlock: Collect slowpath lock statistics"
Next in thread: Waiman Long: "[PATCH v7 2/5] locking/pvqspinlock, x86: Optimize PV unlock code path"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]