[PATCH v3 3/4] riscv: cmpxchg.h: Implement xchg for short

From: guoren
Date: Thu Mar 25 2021 - 03:57:57 EST


From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>

riscv only support lr.wd/s(c).w(d) with word(double word) size &
align access. There are not lr.h/sc.h instructions. But qspinlock.c
need xchg with short type variable:

xchg_tail -> xchg_releaxed(&lock->tail, ...

typedef struct qspinlock {
union {
atomic_t val;

/*
* By using the whole 2nd least significant byte for the
* pending bit, we can allow better optimization of the lock
* acquisition for the pending bit holder.
*/
struct {
u8 locked;
u8 pending;
};
struct {
u16 locked_pending;
u16 tail; /* half word*/
};
};
} arch_spinlock_t;

So we add short emulation in xchg with word length and it only
solve qspinlock's requirement.

Michael has sent another implementation, see the Link below.

Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>
Co-developed-by: Michael Clark <michaeljclark@xxxxxxx>
Tested-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/linux-riscv/20190211043829.30096-2-michaeljclark@xxxxxxx/
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Anup Patel <anup@xxxxxxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Palmer Dabbelt <palmerdabbelt@xxxxxxxxxx>
---
arch/riscv/include/asm/cmpxchg.h | 36 ++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 50513b95411d..5ca41152cf4b 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -22,7 +22,43 @@
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
+ register unsigned long __rc, tmp, align, addr; \
switch (size) { \
+ case 2: \
+ align = ((unsigned long) __ptr & 0x3); \
+ addr = ((unsigned long) __ptr & ~0x3); \
+ if (align) { \
+ __asm__ __volatile__ ( \
+ "0: lr.w %0, (%4) \n" \
+ " mv %1, %0 \n" \
+ " slliw %1, %1, 16 \n" \
+ " srliw %1, %1, 16 \n" \
+ " mv %2, %3 \n" \
+ " slliw %2, %2, 16 \n" \
+ " or %1, %2, %1 \n" \
+ " sc.w %2, %1, (%4) \n" \
+ " bnez %2, 0b \n" \
+ " srliw %0, %0, 16 \n" \
+ : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \
+ : "r" (__new), "r"(addr) \
+ : "memory"); \
+ } else { \
+ __asm__ __volatile__ ( \
+ "0: lr.w %0, (%4) \n" \
+ " mv %1, %0 \n" \
+ " srliw %1, %1, 16 \n" \
+ " slliw %1, %1, 16 \n" \
+ " mv %2, %3 \n" \
+ " or %1, %2, %1 \n" \
+ " sc.w %2, %1, 0(%4) \n" \
+ " bnez %2, 0b \n" \
+ " slliw %0, %0, 16 \n" \
+ " srliw %0, %0, 16 \n" \
+ : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \
+ : "r" (__new), "r"(addr) \
+ : "memory"); \
+ } \
+ break; \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w %0, %2, %1\n" \
--
2.17.1