[PATCH v2 5/6] ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff

From: Vineet Gupta
Date: Mon Aug 03 2015 - 10:37:43 EST


This is to workaround the llock/scond livelock

HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
coherency transactions in the SCU. The exclusive line state keeps rotating
among contenting cores leading to a never ending cycle. So break the cycle
by deferring the retry of failed exclusive access (SCOND). The actual delay
needed is function of number of contending cores as well as the unrelated
coherency traffic from other cores. To keep the code simple, start off with
small delay of 1 which would suffice most cases and in case of contention
double the delay. Eventually the delay is sufficient such that the coherency
pipeline is drained, thus a subsequent exclusive access would succeed.

Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Vineet Gupta <vgupta@xxxxxxxxxxxx>
---
arch/arc/Kconfig | 5 +
arch/arc/include/asm/atomic.h | 49 ++++++-
arch/arc/include/asm/spinlock.h | 293 ++++++++++++++++++++++++++++++++++++++++
arch/arc/kernel/setup.c | 4 +
4 files changed, 347 insertions(+), 4 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a5fccdfbfc8f..bd4670d1b89b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
default y
depends on !ARC_CANT_LLSC

+config ARC_STAR_9000923308
+ bool "Workaround for llock/scond livelock"
+ default y
+ depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
+
config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)"
default y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 3dd36c1efee1..629dfd0a0c6b 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,17 +23,51 @@

#define atomic_set(v, i) (((v)->counter) = (i))

+#ifdef CONFIG_ARC_STAR_9000923308
+
+#define SCOND_FAIL_RETRY_VAR_DEF \
+ unsigned int delay = 1, tmp; \
+
+#define SCOND_FAIL_RETRY_ASM \
+ " bz 4f \n" \
+ " ; --- scond fail delay --- \n" \
+ " mov %[tmp], %[delay] \n" /* tmp = delay */ \
+ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
+ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
+ " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \
+ " mov.z %[delay], 1 \n" /* handle overflow */ \
+ " b 1b \n" /* start over */ \
+ "4: ; --- success --- \n" \
+
+#define SCOND_FAIL_RETRY_VARS \
+ ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \
+
+#else /* !CONFIG_ARC_STAR_9000923308 */
+
+#define SCOND_FAIL_RETRY_VAR_DEF
+
+#define SCOND_FAIL_RETRY_ASM \
+ " bnz 1b \n" \
+
+#define SCOND_FAIL_RETRY_VARS
+
+#endif
+
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
- unsigned int val; \
+ unsigned int val; \
+ SCOND_FAIL_RETRY_VAR_DEF \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
- " bnz 1b \n" \
+ " \n" \
+ SCOND_FAIL_RETRY_ASM \
+ \
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
+ SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
@@ -42,7 +76,8 @@ static inline void atomic_##op(int i, atomic_t *v) \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
- unsigned int val; \
+ unsigned int val; \
+ SCOND_FAIL_RETRY_VAR_DEF \
\
/* \
* Explicit full memory barrier needed before/after as \
@@ -54,8 +89,11 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
- " bnz 1b \n" \
+ " \n" \
+ SCOND_FAIL_RETRY_ASM \
+ \
: [val] "=&r" (val) \
+ SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
@@ -142,6 +180,9 @@ ATOMIC_OP(and, &=, and)
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS

/**
* __atomic_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 9fd5a0221671..2a84525b5aa4 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -20,6 +20,11 @@

#ifdef CONFIG_ARC_HAS_LLSC

+/*
+ * A normal LLOCK/SCOND based system, w/o need for livelock workaround
+ */
+#ifndef CONFIG_ARC_STAR_9000923308
+
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val;
@@ -233,6 +238,294 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
smp_mb();
}

+#else /* CONFIG_ARC_STAR_9000923308 */
+
+/*
+ * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
+ * coherency transactions in the SCU. The exclusive line state keeps rotating
+ * among contenting cores leading to a never ending cycle. So break the cycle
+ * by deferring the retry of failed exclusive access (SCOND). The actual delay
+ * needed is function of number of contending cores as well as the unrelated
+ * coherency traffic from other cores. To keep the code simple, start off with
+ * small delay of 1 which would suffice most cases and in case of contention
+ * double the delay. Eventually the delay is sufficient such that the coherency
+ * pipeline is drained, thus a subsequent exclusive access would succeed.
+ */
+
+#define SCOND_FAIL_RETRY_VAR_DEF \
+ unsigned int delay, tmp; \
+
+#define SCOND_FAIL_RETRY_ASM \
+ " ; --- scond fail delay --- \n" \
+ " mov %[tmp], %[delay] \n" /* tmp = delay */ \
+ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
+ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
+ " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \
+ " mov.z %[delay], 1 \n" /* handle overflow */ \
+ " b 1b \n" /* start over */ \
+ " \n" \
+ "4: ; --- done --- \n" \
+
+#define SCOND_FAIL_RETRY_VARS \
+ ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ unsigned int val;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[slock]] \n"
+ " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
+ " scond %[LOCKED], [%[slock]] \n" /* acquire */
+ " bz 4f \n" /* done */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val)
+ SCOND_FAIL_RETRY_VARS
+ : [slock] "r" (&(lock->slock)),
+ [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned int val, got_it = 0;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[slock]] \n"
+ " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
+ " scond %[LOCKED], [%[slock]] \n" /* acquire */
+ " bz.d 4f \n"
+ " mov.z %[got_it], 1 \n" /* got it */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val),
+ [got_it] "+&r" (got_it)
+ SCOND_FAIL_RETRY_VARS
+ : [slock] "r" (&(lock->slock)),
+ [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
+ : "memory", "cc");
+
+ smp_mb();
+
+ return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ smp_mb();
+
+ lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+ smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ /*
+ * zero means writer holds the lock exclusively, deny Reader.
+ * Otherwise grant lock to first/subseq reader
+ *
+ * if (rw->counter > 0) {
+ * rw->counter--;
+ * ret = 1;
+ * }
+ */
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
+ " sub %[val], %[val], 1 \n" /* reader lock */
+ " scond %[val], [%[rwlock]] \n"
+ " bz 4f \n" /* done */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ unsigned int val, got_it = 0;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
+ " sub %[val], %[val], 1 \n" /* counter-- */
+ " scond %[val], [%[rwlock]] \n"
+ " bz.d 4f \n"
+ " mov.z %[got_it], 1 \n" /* got it */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val),
+ [got_it] "+&r" (got_it)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+
+ return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ /*
+ * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+ * deny writer. Otherwise if unlocked grant to writer
+ * Hence the claim that Linux rwlocks are unfair to writers.
+ * (can be starved for an indefinite time by readers).
+ *
+ * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+ * rw->counter = 0;
+ * ret = 1;
+ * }
+ */
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
+ " mov %[val], %[WR_LOCKED] \n"
+ " scond %[val], [%[rwlock]] \n"
+ " bz 4f \n"
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ unsigned int val, got_it = 0;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
+ " mov %[val], %[WR_LOCKED] \n"
+ " scond %[val], [%[rwlock]] \n"
+ " bz.d 4f \n"
+ " mov.z %[got_it], 1 \n" /* got it */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val),
+ [got_it] "+&r" (got_it)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+
+ return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+
+ smp_mb();
+
+ /*
+ * rw->counter++;
+ */
+ __asm__ __volatile__(
+ "1: llock %[val], [%[rwlock]] \n"
+ " add %[val], %[val], 1 \n"
+ " scond %[val], [%[rwlock]] \n"
+ " bnz 1b \n"
+ " \n"
+ : [val] "=&r" (val)
+ : [rwlock] "r" (&(rw->counter))
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+
+ smp_mb();
+
+ /*
+ * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+ */
+ __asm__ __volatile__(
+ "1: llock %[val], [%[rwlock]] \n"
+ " scond %[UNLOCKED], [%[rwlock]]\n"
+ " bnz 1b \n"
+ " \n"
+ : [val] "=&r" (val)
+ : [rwlock] "r" (&(rw->counter)),
+ [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
+
+#endif /* CONFIG_ARC_STAR_9000923308 */
+
#else /* !CONFIG_ARC_HAS_LLSC */

static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index f2f771bd3ede..cabde9dc0696 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -336,6 +336,10 @@ static void arc_chk_core_config(void)
pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
else if (!cpu->extn.fpu_dp && fpu_enabled)
panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
+
+ if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+ !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
+ panic("llock/scond livelock workaround missing\n");
}

/*
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/