[RFC][PATCH 2/5] mips/atomic: Fix loongson_llsc_mb() wreckage

From: Peter Zijlstra
Date: Wed Apr 24 2019 - 08:48:13 EST


The comment describing the loongson_llsc_mb() reorder case doesn't
make any sense what so ever. Instruction re-ordering is not an SMP
artifact, but rather a CPU local phenomenon. This means that _every_
LL/SC loop needs this barrier right in front to avoid the CPU from
leaking a memop inside it.

For the branch speculation case; if futex_atomic_cmpxchg_inatomic()
needs one at the bne branch target, then surely the normal
__cmpxch_asmg() implementation does too. We cannot rely on the
barriers from cmpxchg() because cmpxchg_local() is implemented with
the same macro, and branch prediction and speculation are, too, CPU
local.

Fixes: e02e07e3127d ("MIPS: Loongson: Introduce and use loongson_llsc_mb()")
Cc: Huacai Chen <chenhc@xxxxxxxxxx>
Cc: Huang Pei <huangpei@xxxxxxxxxxx>
Cc: Paul Burton <paul.burton@xxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/mips/include/asm/atomic.h | 24 ++++++++++++++++++++----
arch/mips/include/asm/barrier.h | 7 ++-----
arch/mips/include/asm/bitops.h | 5 +++++
arch/mips/include/asm/cmpxchg.h | 5 +++++
arch/mips/include/asm/local.h | 2 ++
arch/mips/kernel/syscall.c | 1 +
6 files changed, 35 insertions(+), 9 deletions(-)

--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -193,6 +193,7 @@ static __inline__ int atomic_sub_if_posi
if (kernel_uses_llsc) {
int temp;

+ loongson_llsc_mb();
__asm__ __volatile__(
" .set push \n"
" .set "MIPS_ISA_LEVEL" \n"
@@ -200,16 +201,23 @@ static __inline__ int atomic_sub_if_posi
" .set pop \n"
" subu %0, %1, %3 \n"
" move %1, %0 \n"
- " bltz %0, 1f \n"
+ " bltz %0, 2f \n"
" .set push \n"
" .set "MIPS_ISA_LEVEL" \n"
" sc %1, %2 \n"
"\t" __scbeqz " %1, 1b \n"
- "1: \n"
+ "2: \n"
" .set pop \n"
: "=&r" (result), "=&r" (temp),
"+" GCC_OFF_SMALL_ASM() (v->counter)
: "Ir" (i));
+
+ if (!IS_ENABLED(CONFIG_SMP))
+ loongson_llsc_mb();
+ /*
+ * Otherwise the loongson_llsc_mb() for the bltz target is
+ * implied by the smp_llsc_mb() below.
+ */
} else {
unsigned long flags;

@@ -395,20 +403,28 @@ static __inline__ long atomic64_sub_if_p
if (kernel_uses_llsc) {
long temp;

+ loongson_llsc_mb();
__asm__ __volatile__(
" .set push \n"
" .set "MIPS_ISA_LEVEL" \n"
"1: lld %1, %2 # atomic64_sub_if_positive\n"
" dsubu %0, %1, %3 \n"
" move %1, %0 \n"
- " bltz %0, 1f \n"
+ " bltz %0, 2f \n"
" scd %1, %2 \n"
"\t" __scbeqz " %1, 1b \n"
- "1: \n"
+ "2: \n"
" .set pop \n"
: "=&r" (result), "=&r" (temp),
"+" GCC_OFF_SMALL_ASM() (v->counter)
: "Ir" (i));
+
+ if (!IS_ENABLED(CONFIG_SMP))
+ loongson_llsc_mb();
+ /*
+ * Otherwise the loongson_llsc_mb() for the bltz target is
+ * implied by the smp_llsc_mb() below.
+ */
} else {
unsigned long flags;

--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -248,10 +248,7 @@
*
* In order to avoid this we need to place a memory barrier (ie. a sync
* instruction) prior to every ll instruction, in between it & any earlier
- * memory access instructions. Many of these cases are already covered by
- * smp_mb__before_llsc() but for the remaining cases, typically ones in
- * which multiple CPUs may operate on a memory location but ordering is not
- * usually guaranteed, we use loongson_llsc_mb() below.
+ * memory access instructions.
*
* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
*
@@ -267,7 +264,7 @@
* This case affects all current Loongson 3 CPUs.
*/
#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
-#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory")
#else
#define loongson_llsc_mb() do { } while (0)
#endif
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -249,6 +249,7 @@ static inline int test_and_set_bit(unsig
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;

+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set push \n"
@@ -305,6 +306,7 @@ static inline int test_and_set_bit_lock(
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;

+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set push \n"
@@ -364,6 +366,7 @@ static inline int test_and_clear_bit(uns
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;

+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" " __LL "%0, %1 # test_and_clear_bit \n"
@@ -379,6 +382,7 @@ static inline int test_and_clear_bit(uns
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;

+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set push \n"
@@ -438,6 +442,7 @@ static inline int test_and_change_bit(un
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;

+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set push \n"
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -46,6 +46,7 @@ extern unsigned long __xchg_called_with_
__typeof(*(m)) __ret; \
\
if (kernel_uses_llsc) { \
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
@@ -117,6 +118,7 @@ static inline unsigned long __xchg(volat
__typeof(*(m)) __ret; \
\
if (kernel_uses_llsc) { \
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
@@ -134,6 +136,7 @@ static inline unsigned long __xchg(volat
: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \
: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \
: "memory"); \
+ loongson_llsc_mb(); \
} else { \
unsigned long __flags; \
\
@@ -229,6 +232,7 @@ static inline unsigned long __cmpxchg64(
*/
local_irq_save(flags);

+ loongson_llsc_mb();
asm volatile(
" .set push \n"
" .set " MIPS_ISA_ARCH_LEVEL " \n"
@@ -274,6 +278,7 @@ static inline unsigned long __cmpxchg64(
"r" (old),
"r" (new)
: "memory");
+ loongson_llsc_mb();

local_irq_restore(flags);
return ret;
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -49,6 +49,7 @@ static __inline__ long local_add_return(
} else if (kernel_uses_llsc) {
unsigned long temp;

+ loongson_llsc_mb();
__asm__ __volatile__(
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -96,6 +97,7 @@ static __inline__ long local_sub_return(
} else if (kernel_uses_llsc) {
unsigned long temp;

+ loongson_llsc_mb();
__asm__ __volatile__(
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -132,6 +132,7 @@ static inline int mips_atomic_set(unsign
[efault] "i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
+ loongson_llsc_mb();
__asm__ __volatile__ (
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"