Re: [PATCH v3 2/2] riscv: Add Zawrs support for spinlocks

From: Andrew Jones
Date: Wed May 24 2023 - 13:06:01 EST


On Sun, May 21, 2023 at 01:47:15PM +0200, Heiko Stuebner wrote:
> From: Christoph Müllner <christoph.muellner@xxxxxxxx>
>
> The current RISC-V code uses the generic ticket lock implementation,
> that calls the macros smp_cond_load_relaxed() and smp_cond_load_acquire().
> Currently, RISC-V uses the generic implementation of these macros.
> This patch introduces a RISC-V specific implementation, of these
> macros, that peels off the first loop iteration

I guess this peeling off of the first iteration is because it's expected
that the load generated by READ_ONCE() is more efficient than lr.w/d? If
we're worried about unnecessary use of lr.w/d, then shouldn't we look
for a solution that doesn't issue those instructions when we don't have
the Zawrs extension?

> and modifies the waiting
> loop such, that it is possible to use the WRS.STO instruction of the Zawrs
> ISA extension to stall the CPU.
>
> The resulting implementation of smp_cond_load_*() will only work for
> 32-bit or 64-bit types for RV64 and 32-bit types for RV32.
> This is caused by the restrictions of the LR instruction (RISC-V only
> has LR.W and LR.D). Compiler assertions guard this new restriction.
>
> This patch uses the existing RISC-V ISA extension framework
> to detect the presence of Zawrs at run-time.
> If available a NOP instruction will be replaced by WRS.NTO or WRS.STO.
>
> The whole mechanism is gated by Kconfig setting, which defaults to Y.
>
> The Zawrs specification can be found here:
> https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc
>
> Signed-off-by: Christoph Müllner <christoph.muellner@xxxxxxxx>
> [rebase, update to review comments]
> Signed-off-by: Heiko Stuebner <heiko.stuebner@xxxxxxxx>
> ---
> arch/riscv/Kconfig | 10 +++++
> arch/riscv/include/asm/barrier.h | 64 ++++++++++++++++++++++++++++
> arch/riscv/include/asm/errata_list.h | 14 ++++++
> arch/riscv/include/asm/hwcap.h | 1 +
> arch/riscv/kernel/cpu.c | 1 +
> arch/riscv/kernel/cpufeature.c | 1 +
> 6 files changed, 91 insertions(+)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 348c0fa1fc8c..60ff303ff58a 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -460,6 +460,16 @@ config RISCV_ISA_SVPBMT
>
> If you don't know what to do here, say Y.
>
> +config RISCV_ISA_ZAWRS
> + bool "Zawrs extension support"

This should be "Zawrs extension support for ..."

> + depends on RISCV_ALTERNATIVE
> + default y
> + help
> + Adds support to dynamically detect the presence of the Zawrs extension
> + (wait for reservation set) and enable its usage.
> +
> + If you don't know what to do here, say Y.
> +
> config TOOLCHAIN_HAS_ZBB
> bool
> default y
> diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
> index 110752594228..bd64cfe5ae10 100644
> --- a/arch/riscv/include/asm/barrier.h
> +++ b/arch/riscv/include/asm/barrier.h
> @@ -12,6 +12,8 @@
>
> #ifndef __ASSEMBLY__
>
> +#include <asm/errata_list.h>
> +
> #define nop() __asm__ __volatile__ ("nop")
> #define __nops(n) ".rept " #n "\nnop\n.endr\n"
> #define nops(n) __asm__ __volatile__ (__nops(n))
> @@ -44,6 +46,36 @@ do { \
> ___p1; \
> })
>
> +#define ___smp_load_reservedN(pfx, ptr) \
> +({ \
> + typeof(*ptr) ___p1; \
> + __asm__ __volatile__ ("lr." pfx " %[p], %[c]\n" \
> + : [p]"=&r" (___p1), [c]"+A"(*ptr)); \
> + ___p1; \
> +})
> +
> +#define __smp_load_reserved_relaxed(ptr) \
> +({ \
> + typeof(*ptr) ___p1; \
> + if (sizeof(*ptr) == sizeof(int)) \
> + ___p1 = ___smp_load_reservedN("w", ptr); \
> + else if (sizeof(*ptr) == sizeof(long)) \
> + ___p1 = ___smp_load_reservedN("d", ptr); \
> + else \
> + compiletime_assert(0, \
> + "Need type compatible with LR/SC instructions " \
> + "for " __stringify(ptr)); \
> + ___p1; \

It's more common to use a switch for these things, embedding the
lr.w/d asm in each case, something like the macros in
arch/riscv/include/asm/cmpxchg.h. Can we stick with that pattern?

> +})
> +
> +#define __smp_load_reserved_acquire(ptr) \
> +({ \
> + typeof(*ptr) ___p1; \
> + ___p1 = __smp_load_reserved_relaxed(ptr); \
> + RISCV_FENCE(r, rw); \
> + ___p1; \
> +})
> +
> /*
> * This is a very specific barrier: it's currently only used in two places in
> * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
> @@ -71,6 +103,38 @@ do { \
> */
> #define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw)
>
> +#define smp_cond_load_relaxed(ptr, cond_expr) \
> +({ \
> + typeof(ptr) __PTR = (ptr); \
> + __unqual_scalar_typeof(*ptr) VAL; \
> + VAL = READ_ONCE(*__PTR); \
> + if (!cond_expr) { \
> + for (;;) { \
> + VAL = __smp_load_reserved_relaxed(__PTR); \
> + if (cond_expr) \
> + break; \
> + ALT_WRS_STO(); \
> + } \
> + } \
> + (typeof(*ptr))VAL; \
> +})
> +
> +#define smp_cond_load_acquire(ptr, cond_expr) \
> +({ \
> + typeof(ptr) __PTR = (ptr); \
> + __unqual_scalar_typeof(*ptr) VAL; \
> + VAL = smp_load_acquire(__PTR); \
> + if (!cond_expr) { \
> + for (;;) { \
> + VAL = __smp_load_reserved_acquire(__PTR); \
> + if (cond_expr) \
> + break; \
> + ALT_WRS_STO(); \
> + } \
> + } \
> + (typeof(*ptr))VAL; \
> +})
> +
> #include <asm-generic/barrier.h>
>
> #endif /* __ASSEMBLY__ */
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index fb1a810f3d8c..36a72a07d62c 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -44,6 +44,20 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
> ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
> : : "r" (addr) : "memory")
>
> +#define ZAWRS_WRS_NTO ".long 0x00d00073"
> +#define ALT_WRS_NTO() \
> +asm volatile(ALTERNATIVE( \
> + "nop\n\t", \
> + ZAWRS_WRS_NTO "\n\t", \
> + 0, RISCV_ISA_EXT_ZAWRS, CONFIG_RISCV_ISA_ZAWRS))

We don't use this. Do we need to define it now?

> +
> +#define ZAWRS_WRS_STO ".long 0x01d00073"

I'd prefer we use insn-def.h to define instructions, rather than scatter
.long's around, but since this instruction doesn't have any inputs, then
I guess it's not so important to use insn-def.h.

> +#define ALT_WRS_STO() \
> +asm volatile(ALTERNATIVE( \
> + "nop\n\t", \
> + ZAWRS_WRS_STO "\n\t", \
> + 0, RISCV_ISA_EXT_ZAWRS, CONFIG_RISCV_ISA_ZAWRS))
> +

These alternatives aren't supporting errata, so putting them in
errata_list.h doesn't seem appropriate. I think cpufeature.h
would be better.

> /*
> * _val is marked as "will be overwritten", so need to set it to 0
> * in the default case.
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index e0c40a4c63d5..4233d87539ab 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -46,6 +46,7 @@
> #define RISCV_ISA_EXT_ZICBOZ 34
> #define RISCV_ISA_EXT_SMAIA 35
> #define RISCV_ISA_EXT_SSAIA 36
> +#define RISCV_ISA_EXT_ZAWRS 37
>
> #define RISCV_ISA_EXT_MAX 64
> #define RISCV_ISA_EXT_NAME_LEN_MAX 32
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index c96aa56cf1c7..ac9c604d8936 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -184,6 +184,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
> __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
> __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> + __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
> __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
> __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index b1d6b7e4b829..d4a22a7aed99 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -236,6 +236,7 @@ void __init riscv_fill_hwcap(void)
> SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
> SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
> SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
> + SET_ISA_EXT_MAP("zawrs", RISCV_ISA_EXT_ZAWRS);
> SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
> SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
> SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ);
> --
> 2.39.0
>

Thanks,
drew