[GIT PULL] locking changes for v4.1

From: Ingo Molnar
Date: Mon Apr 13 2015 - 02:46:49 EST


Linus,

Please pull the latest locking-core-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-core-for-linus

# HEAD: 58995a9a5b292458f94a2356b8c878230fa56fe0 powerpc, jump_label: Include linux/jump_label.h to get HAVE_JUMP_LABEL define

Main changes:

- jump label asm preparatory work for PowerPC (Anton Blanchard)

- rwsem optimizations and cleanups (Davidlohr Bueso)

- mutex optimizations and cleanups (Jason Low)

- futex fix (Oleg Nesterov)

- remove broken atomicity checks from {READ,WRITE}_ONCE() (Peter Zijlstra)

Thanks,

Ingo

------------------>
Anton Blanchard (3):
jump_label: Allow asm/jump_label.h to be included in assembly
jump_label: Allow jump labels to be used in assembly
powerpc, jump_label: Include linux/jump_label.h to get HAVE_JUMP_LABEL define

Davidlohr Bueso (5):
locking/rwsem: Document barrier need when waking tasks
locking/rwsem: Set lock ownership ASAP
locking/rwsem: Avoid deceiving lock spinners
locking/rwsem: Check for active lock before bailing on spinning
locking: Remove ACCESS_ONCE() usage

Jason Low (4):
locking/mutex: In mutex_spin_on_owner(), return true when owner changes
locking/mutex: Refactor mutex_spin_on_owner()
locking/rwsem: Fix lock optimistic spinning when owner is not running
locking/mutex: Further simplify mutex_spin_on_owner()

Oleg Nesterov (1):
locking/futex: Check PF_KTHREAD rather than !p->mm to filter out kthreads

Peter Zijlstra (1):
locking: Remove atomicy checks from {READ,WRITE}_ONCE

Tom(JeHyeon) Yeon (1):
locking/rtmutex: Rename argument in the rt_mutex_adjust_prio_chain() documentation as well


Makefile | 1 +
arch/arm/include/asm/jump_label.h | 5 +-
arch/arm64/include/asm/jump_label.h | 8 +--
arch/mips/include/asm/jump_label.h | 7 +-
arch/powerpc/platforms/powernv/opal-wrappers.S | 2 +-
arch/powerpc/platforms/pseries/hvCall.S | 2 +-
arch/powerpc/platforms/pseries/lpar.c | 2 +-
arch/s390/include/asm/jump_label.h | 3 +
arch/sparc/include/asm/jump_label.h | 5 +-
arch/x86/include/asm/jump_label.h | 5 +-
include/linux/compiler.h | 16 -----
include/linux/jump_label.h | 21 ++++--
include/linux/seqlock.h | 6 +-
kernel/futex.c | 2 +-
kernel/locking/mcs_spinlock.h | 6 +-
kernel/locking/mutex.c | 51 ++++++--------
kernel/locking/osq_lock.c | 14 ++--
kernel/locking/rtmutex.c | 2 +-
kernel/locking/rwsem-spinlock.c | 7 ++
kernel/locking/rwsem-xadd.c | 98 +++++++++++++++-----------
kernel/locking/rwsem.c | 22 +-----
kernel/locking/rwsem.h | 20 ++++++
lib/lockref.c | 2 +-
23 files changed, 160 insertions(+), 147 deletions(-)
create mode 100644 kernel/locking/rwsem.h

diff --git a/Makefile b/Makefile
index 9fab639727c7..a2aa475543e9 100644
--- a/Makefile
+++ b/Makefile
@@ -779,6 +779,7 @@ KBUILD_ARFLAGS := $(call ar-option,D)
# check for 'asm goto'
ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+ KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
endif

include $(srctree)/scripts/Makefile.kasan
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 70f9b9bfb1f9..5f337dc5c108 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_ARM_JUMP_LABEL_H
#define _ASM_ARM_JUMP_LABEL_H

-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__

#include <linux/types.h>

@@ -27,8 +27,6 @@ static __always_inline bool arch_static_branch(struct static_key *key)
return true;
}

-#endif /* __KERNEL__ */
-
typedef u32 jump_label_t;

struct jump_entry {
@@ -37,4 +35,5 @@ struct jump_entry {
jump_label_t key;
};

+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 076a1c714049..c0e5165c2f76 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,11 +18,12 @@
*/
#ifndef __ASM_JUMP_LABEL_H
#define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <asm/insn.h>

-#ifdef __KERNEL__
-
#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE

static __always_inline bool arch_static_branch(struct static_key *key)
@@ -39,8 +40,6 @@ static __always_inline bool arch_static_branch(struct static_key *key)
return true;
}

-#endif /* __KERNEL__ */
-
typedef u64 jump_label_t;

struct jump_entry {
@@ -49,4 +48,5 @@ struct jump_entry {
jump_label_t key;
};

+#endif /* __ASSEMBLY__ */
#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index fdbff44e5482..608aa57799c8 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -8,9 +8,9 @@
#ifndef _ASM_MIPS_JUMP_LABEL_H
#define _ASM_MIPS_JUMP_LABEL_H

-#include <linux/types.h>
+#ifndef __ASSEMBLY__

-#ifdef __KERNEL__
+#include <linux/types.h>

#define JUMP_LABEL_NOP_SIZE 4

@@ -39,8 +39,6 @@ static __always_inline bool arch_static_branch(struct static_key *key)
return true;
}

-#endif /* __KERNEL__ */
-
#ifdef CONFIG_64BIT
typedef u64 jump_label_t;
#else
@@ -53,4 +51,5 @@ struct jump_entry {
jump_label_t key;
};

+#endif /* __ASSEMBLY__ */
#endif /* _ASM_MIPS_JUMP_LABEL_H */
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0509bca5e830..fcbe899fe299 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -9,11 +9,11 @@
* 2 of the License, or (at your option) any later version.
*/

+#include <linux/jump_label.h>
#include <asm/ppc_asm.h>
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/opal.h>
-#include <asm/jump_label.h>

.section ".text"

diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index ccd53f91e8aa..74b5b8e239c8 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -7,12 +7,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/jump_label.h>
#include <asm/hvcall.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/jump_label.h>

.section ".text"

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b5682fd6c984..b7a67e3d2201 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -26,7 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/console.h>
#include <linux/export.h>
-#include <linux/static_key.h>
+#include <linux/jump_label.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 58642fd29c87..2b77e235b5fb 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -1,6 +1,8 @@
#ifndef _ASM_S390_JUMP_LABEL_H
#define _ASM_S390_JUMP_LABEL_H

+#ifndef __ASSEMBLY__
+
#include <linux/types.h>

#define JUMP_LABEL_NOP_SIZE 6
@@ -39,4 +41,5 @@ struct jump_entry {
jump_label_t key;
};

+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index ec2e2e2aba7d..cc9b04a2b11b 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_SPARC_JUMP_LABEL_H
#define _ASM_SPARC_JUMP_LABEL_H

-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__

#include <linux/types.h>

@@ -22,8 +22,6 @@ static __always_inline bool arch_static_branch(struct static_key *key)
return true;
}

-#endif /* __KERNEL__ */
-
typedef u32 jump_label_t;

struct jump_entry {
@@ -32,4 +30,5 @@ struct jump_entry {
jump_label_t key;
};

+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 6a2cefb4395a..a4c1cf7e93f8 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H

-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__

#include <linux/stringify.h>
#include <linux/types.h>
@@ -30,8 +30,6 @@ static __always_inline bool arch_static_branch(struct static_key *key)
return true;
}

-#endif /* __KERNEL__ */
-
#ifdef CONFIG_X86_64
typedef u64 jump_label_t;
#else
@@ -44,4 +42,5 @@ struct jump_entry {
jump_label_t key;
};

+#endif /* __ASSEMBLY__ */
#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 1b45e4a0519b..0e41ca0e5927 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -192,29 +192,16 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);

#include <uapi/linux/types.h>

-static __always_inline void data_access_exceeds_word_size(void)
-#ifdef __compiletime_warning
-__compiletime_warning("data access exceeds word size and won't be atomic")
-#endif
-;
-
-static __always_inline void data_access_exceeds_word_size(void)
-{
-}
-
static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
-#ifdef CONFIG_64BIT
case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
-#endif
default:
barrier();
__builtin_memcpy((void *)res, (const void *)p, size);
- data_access_exceeds_word_size();
barrier();
}
}
@@ -225,13 +212,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
-#ifdef CONFIG_64BIT
case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
-#endif
default:
barrier();
__builtin_memcpy((void *)p, (const void *)res, size);
- data_access_exceeds_word_size();
barrier();
}
}
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 98f923b6a0ea..f4de473f226b 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -45,6 +45,12 @@
* same as using STATIC_KEY_INIT_FALSE.
*/

+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+# define HAVE_JUMP_LABEL
+#endif
+
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/bug.h>
@@ -55,7 +61,7 @@ extern bool static_key_initialized;
"%s used before call to jump_label_init", \
__func__)

-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL

struct static_key {
atomic_t enabled;
@@ -66,13 +72,18 @@ struct static_key {
#endif
};

-# include <asm/jump_label.h>
-# define HAVE_JUMP_LABEL
#else
struct static_key {
atomic_t enabled;
};
-#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
+#endif /* HAVE_JUMP_LABEL */
+#endif /* __ASSEMBLY__ */
+
+#ifdef HAVE_JUMP_LABEL
+#include <asm/jump_label.h>
+#endif
+
+#ifndef __ASSEMBLY__

enum jump_label_type {
JUMP_LABEL_DISABLE = 0,
@@ -203,3 +214,5 @@ static inline bool static_key_enabled(struct static_key *key)
}

#endif /* _LINUX_JUMP_LABEL_H */
+
+#endif /* __ASSEMBLY__ */
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f5df8f687b4d..5f68d0a391ce 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -108,7 +108,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
unsigned ret;

repeat:
- ret = ACCESS_ONCE(s->sequence);
+ ret = READ_ONCE(s->sequence);
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
@@ -127,7 +127,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
*/
static inline unsigned raw_read_seqcount(const seqcount_t *s)
{
- unsigned ret = ACCESS_ONCE(s->sequence);
+ unsigned ret = READ_ONCE(s->sequence);
smp_rmb();
return ret;
}
@@ -179,7 +179,7 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
*/
static inline unsigned raw_seqcount_begin(const seqcount_t *s)
{
- unsigned ret = ACCESS_ONCE(s->sequence);
+ unsigned ret = READ_ONCE(s->sequence);
smp_rmb();
return ret & ~1;
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 2a5e3830e953..2579e407ff67 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -900,7 +900,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
if (!p)
return -ESRCH;

- if (!p->mm) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
put_task_struct(p);
return -EPERM;
}
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index d1fe2ba5bac9..75e114bdf3f2 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
*/
return;
}
- ACCESS_ONCE(prev->next) = node;
+ WRITE_ONCE(prev->next, node);

/* Wait until the lock holder passes the lock down. */
arch_mcs_spin_lock_contended(&node->locked);
@@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
static inline
void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
{
- struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+ struct mcs_spinlock *next = READ_ONCE(node->next);

if (likely(!next)) {
/*
@@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
if (likely(cmpxchg(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
- while (!(next = ACCESS_ONCE(node->next)))
+ while (!(next = READ_ONCE(node->next)))
cpu_relax_lowlatency();
}

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 94674e5919cb..4cccea6b8934 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,7 +25,7 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
-#include "mcs_spinlock.h"
+#include <linux/osq_lock.h>

/*
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
}

#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
-{
- if (lock->owner != owner)
- return false;
-
- /*
- * Ensure we emit the owner->on_cpu, dereference _after_ checking
- * lock->owner still matches owner, if that fails, owner might
- * point to free()d memory, if it still matches, the rcu_read_lock()
- * ensures the memory stays valid.
- */
- barrier();
-
- return owner->on_cpu;
-}
-
/*
* Look out! "owner" is an entirely speculative pointer
* access and not reliable.
*/
static noinline
-int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
{
+ bool ret = true;
+
rcu_read_lock();
- while (owner_running(lock, owner)) {
- if (need_resched())
+ while (lock->owner == owner) {
+ /*
+ * Ensure we emit the owner->on_cpu, dereference _after_
+ * checking lock->owner still matches owner. If that fails,
+ * owner might point to freed memory. If it still matches,
+ * the rcu_read_lock() ensures the memory stays valid.
+ */
+ barrier();
+
+ if (!owner->on_cpu || need_resched()) {
+ ret = false;
break;
+ }

cpu_relax_lowlatency();
}
rcu_read_unlock();

- /*
- * We break out the loop above on need_resched() and when the
- * owner changed, which is a sign for heavy contention. Return
- * success only when lock->owner is NULL.
- */
- return lock->owner == NULL;
+ return ret;
}

/*
@@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
return 0;

rcu_read_lock();
- owner = ACCESS_ONCE(lock->owner);
+ owner = READ_ONCE(lock->owner);
if (owner)
retval = owner->on_cpu;
rcu_read_unlock();
@@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
* As such, when deadlock detection needs to be
* performed the optimistic spinning cannot be done.
*/
- if (ACCESS_ONCE(ww->ctx))
+ if (READ_ONCE(ww->ctx))
break;
}

@@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
- owner = ACCESS_ONCE(lock->owner);
+ owner = READ_ONCE(lock->owner);
if (owner && !mutex_spin_on_owner(lock, owner))
break;

@@ -490,7 +481,7 @@ static inline int __sched
__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
- struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);

if (!hold_ctx)
return 0;
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index c112d00341b0..dc85ee23a26f 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)

prev = decode_cpu(old);
node->prev = prev;
- ACCESS_ONCE(prev->next) = node;
+ WRITE_ONCE(prev->next, node);

/*
* Normally @prev is untouchable after the above store; because at that
@@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* cmpxchg in an attempt to undo our queueing.
*/

- while (!ACCESS_ONCE(node->locked)) {
+ while (!READ_ONCE(node->locked)) {
/*
* If we need to reschedule bail... so we can block.
*/
@@ -148,7 +148,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* Or we race against a concurrent unqueue()'s step-B, in which
* case its step-C will write us a new @node->prev pointer.
*/
- prev = ACCESS_ONCE(node->prev);
+ prev = READ_ONCE(node->prev);
}

/*
@@ -170,8 +170,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* it will wait in Step-A.
*/

- ACCESS_ONCE(next->prev) = prev;
- ACCESS_ONCE(prev->next) = next;
+ WRITE_ONCE(next->prev, prev);
+ WRITE_ONCE(prev->next, next);

return false;
}
@@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock)
node = this_cpu_ptr(&osq_node);
next = xchg(&node->next, NULL);
if (next) {
- ACCESS_ONCE(next->locked) = 1;
+ WRITE_ONCE(next->locked, 1);
return;
}

next = osq_wait_next(lock, node, NULL);
if (next)
- ACCESS_ONCE(next->locked) = 1;
+ WRITE_ONCE(next->locked, 1);
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index e16e5542bf13..c0b8e9db6b2e 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
*
* @task: the task owning the mutex (owner) for which a chain walk is
* probably needed
- * @deadlock_detect: do we have to carry out deadlock detection?
+ * @chwalk: do we have to carry out deadlock detection?
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
* things for a task that has just got its priority adjusted, and
* is waiting on a mutex)
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 2555ae15ec14..3a5048572065 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)

list_del(&waiter->list);
tsk = waiter->task;
+ /*
+ * Make sure we do not wakeup the next reader before
+ * setting the nil condition to grant the next reader;
+ * otherwise we could miss the wakeup on the other
+ * side and end up sleeping again. See the pairing
+ * in rwsem_down_read_failed().
+ */
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2f7cc4076f50..3417d0172a5d 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -14,8 +14,9 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/sched/rt.h>
+#include <linux/osq_lock.h>

-#include "mcs_spinlock.h"
+#include "rwsem.h"

/*
* Guide to the rw_semaphore's count field for common values.
@@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task;
+ /*
+ * Make sure we do not wakeup the next reader before
+ * setting the nil condition to grant the next reader;
+ * otherwise we could miss the wakeup on the other
+ * side and end up sleeping again. See the pairing
+ * in rwsem_down_read_failed().
+ */
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
@@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
if (!list_is_singular(&sem->wait_list))
rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+ rwsem_set_owner(sem);
return true;
}

@@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
- long old, count = ACCESS_ONCE(sem->count);
+ long old, count = READ_ONCE(sem->count);

while (true) {
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;

old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
- if (old == count)
+ if (old == count) {
+ rwsem_set_owner(sem);
return true;
+ }

count = old;
}
@@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner;
- bool on_cpu = false;
+ bool ret = true;

if (need_resched())
return false;

rcu_read_lock();
- owner = ACCESS_ONCE(sem->owner);
- if (owner)
- on_cpu = owner->on_cpu;
- rcu_read_unlock();
-
- /*
- * If sem->owner is not set, yet we have just recently entered the
- * slowpath, then there is a possibility reader(s) may have the lock.
- * To be safe, avoid spinning in these situations.
- */
- return on_cpu;
-}
-
-static inline bool owner_running(struct rw_semaphore *sem,
- struct task_struct *owner)
-{
- if (sem->owner != owner)
- return false;
-
- /*
- * Ensure we emit the owner->on_cpu, dereference _after_ checking
- * sem->owner still matches owner, if that fails, owner might
- * point to free()d memory, if it still matches, the rcu_read_lock()
- * ensures the memory stays valid.
- */
- barrier();
+ owner = READ_ONCE(sem->owner);
+ if (!owner) {
+ long count = READ_ONCE(sem->count);
+ /*
+ * If sem->owner is not set, yet we have just recently entered the
+ * slowpath with the lock being active, then there is a possibility
+ * reader(s) may have the lock. To be safe, bail spinning in these
+ * situations.
+ */
+ if (count & RWSEM_ACTIVE_MASK)
+ ret = false;
+ goto done;
+ }

- return owner->on_cpu;
+ ret = owner->on_cpu;
+done:
+ rcu_read_unlock();
+ return ret;
}

static noinline
bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
{
+ long count;
+
rcu_read_lock();
- while (owner_running(sem, owner)) {
- if (need_resched())
- break;
+ while (sem->owner == owner) {
+ /*
+ * Ensure we emit the owner->on_cpu, dereference _after_
+ * checking sem->owner still matches owner, if that fails,
+ * owner might point to free()d memory, if it still matches,
+ * the rcu_read_lock() ensures the memory stays valid.
+ */
+ barrier();
+
+ /* abort spinning when need_resched or owner is not running */
+ if (!owner->on_cpu || need_resched()) {
+ rcu_read_unlock();
+ return false;
+ }

cpu_relax_lowlatency();
}
rcu_read_unlock();

+ if (READ_ONCE(sem->owner))
+ return true; /* new owner, continue spinning */
+
/*
- * We break out the loop above on need_resched() or when the
- * owner changed, which is a sign for heavy contention. Return
- * success only when sem->owner is NULL.
+ * When the owner is not set, the lock could be free or
+ * held by readers. Check the counter to verify the
+ * state.
*/
- return sem->owner == NULL;
+ count = READ_ONCE(sem->count);
+ return (count == 0 || count == RWSEM_WAITING_BIAS);
}

static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
goto done;

while (true) {
- owner = ACCESS_ONCE(sem->owner);
+ owner = READ_ONCE(sem->owner);
if (owner && !rwsem_spin_on_owner(sem, owner))
break;

@@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)

/* we're now waiting on the lock, but no longer actively locking */
if (waiting) {
- count = ACCESS_ONCE(sem->count);
+ count = READ_ONCE(sem->count);

/*
* If there were already threads queued before us and there are
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e2d3bc7f03b4..205be0ce34de 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -9,29 +9,9 @@
#include <linux/sched.h>
#include <linux/export.h>
#include <linux/rwsem.h>
-
#include <linux/atomic.h>

-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
- sem->owner = current;
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
- sem->owner = NULL;
-}
-
-#else
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-}
-#endif
+#include "rwsem.h"

/*
* lock for reading
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
new file mode 100644
index 000000000000..870ed9a5b426
--- /dev/null
+++ b/kernel/locking/rwsem.h
@@ -0,0 +1,20 @@
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+ sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+ sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
diff --git a/lib/lockref.c b/lib/lockref.c
index ecb9a665ec19..494994bf17c8 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -18,7 +18,7 @@
#define CMPXCHG_LOOP(CODE, SUCCESS) do { \
struct lockref old; \
BUILD_BUG_ON(sizeof(old) != 8); \
- old.lock_count = ACCESS_ONCE(lockref->lock_count); \
+ old.lock_count = READ_ONCE(lockref->lock_count); \
while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \
struct lockref new = old, prev = old; \
CODE \
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/