[tip:locking/core] locking/mutex: Optimize mutex_trylock() fast-path
From: tip-bot for Peter Zijlstra
Date: Wed Jun 08 2016 - 10:27:35 EST
Commit-ID: 6428671bae97caa7040e24e79e969fd87908f4f3
Gitweb: http://git.kernel.org/tip/6428671bae97caa7040e24e79e969fd87908f4f3
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Wed, 1 Jun 2016 20:58:15 +0200
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Wed, 8 Jun 2016 15:17:01 +0200
locking/mutex: Optimize mutex_trylock() fast-path
A while back Viro posted a number of 'interesting' mutex_is_locked()
users on IRC, one of those was RCU.
RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
regular load before modify pattern.
While the use isn't wrong per se, its curious in that its needed at all,
mutex_trylock() should be good enough on its own to avoid the pointless
cacheline bounces.
So fix those and remove the mutex_is_locked() (ab)use from RCU.
Reported-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Acked-by: Paul McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Acked-by: Davidlohr Bueso <dave@xxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Waiman Long <Waiman.Long@xxxxxxx>
Link: http://lkml.kernel.org/r/20160601185815.GW3190@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/ia64/include/asm/mutex.h | 2 +-
arch/powerpc/include/asm/mutex.h | 2 +-
arch/x86/include/asm/mutex_32.h | 2 +-
arch/x86/include/asm/mutex_64.h | 6 +++---
include/asm-generic/mutex-dec.h | 2 +-
include/asm-generic/mutex-xchg.h | 6 +++++-
kernel/rcu/tree.c | 1 -
7 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d..28cb819 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (cmpxchg_acq(count, 1, 0) == 1)
+ if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
return 1;
return 0;
}
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23..078155f 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda..e9355a8 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
/* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a4..d985075 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
- else
- return 0;
+
+ return 0;
}
#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index fd694cf..c54829d 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index a6b4a7b..3269ec4 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- int prev = atomic_xchg_acquire(count, 0);
+ int prev;
+ if (atomic_read(count) != 1)
+ return 0;
+
+ prev = atomic_xchg_acquire(count, 0);
if (unlikely(prev < 0)) {
/*
* The lock was marked contended so we must restore that
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c7f1bc4..b732689 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
(rnp == rnp_root ||
ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
- !mutex_is_locked(&rsp->exp_mutex) &&
mutex_trylock(&rsp->exp_mutex))
goto fastpath;