RE: [PATCH v3 2/2] RISC-V: separate Zbb optimisations requiring and not requiring toolchain support
From: Wang, Xiao W
Date: Tue May 28 2024 - 21:01:39 EST
> -----Original Message-----
> From: Conor Dooley <conor@xxxxxxxxxx>
> Sent: Tuesday, May 28, 2024 7:11 PM
> To: linux-riscv@xxxxxxxxxxxxxxxxxxx
> Cc: conor@xxxxxxxxxx; Conor Dooley <conor.dooley@xxxxxxxxxxxxx>; Wang,
> Xiao W <xiao.w.wang@xxxxxxxxx>; Andrew Jones
> <ajones@xxxxxxxxxxxxxxxx>; pulehui@xxxxxxxxxx; Charlie Jenkins
> <charlie@xxxxxxxxxxxx>; Paul Walmsley <paul.walmsley@xxxxxxxxxx>; Palmer
> Dabbelt <palmer@xxxxxxxxxxx>; linux-kernel@xxxxxxxxxxxxxxx; Samuel
> Holland <samuel.holland@xxxxxxxxxx>; Pu Lehui
> <pulehui@xxxxxxxxxxxxxxx>; Björn Töpel <bjorn@xxxxxxxxxx>
> Subject: [PATCH v3 2/2] RISC-V: separate Zbb optimisations requiring and not
> requiring toolchain support
>
> From: Conor Dooley <conor.dooley@xxxxxxxxxxxxx>
>
> It seems a bit ridiculous to require toolchain support for BPF to
> assemble Zbb instructions, so move the dependency on toolchain support
> for Zbb optimisations out of the Kconfig option and to the callsites.
>
> Zbb support has always depended on alternatives, so while adjusting the
> config options guarding optimisations, remove any checks for
> whether or not alternatives are enabled.
>
> Signed-off-by: Conor Dooley <conor.dooley@xxxxxxxxxxxxx>
> ---
> v2/v3:
> - Per Drew's suggestion, drop the stub Kconfig option and instead push
> out the toolchain dependency to the relevant callsites.
> - Delete a bunch of comments about only attempting Zbb if alternatives
> are available, since they always are.
> ---
> arch/riscv/Kconfig | 4 ++--
> arch/riscv/include/asm/arch_hweight.h | 6 +++---
> arch/riscv/include/asm/bitops.h | 4 ++--
> arch/riscv/include/asm/checksum.h | 3 +--
> arch/riscv/lib/csum.c | 21 +++------------------
> arch/riscv/lib/strcmp.S | 5 +++--
> arch/riscv/lib/strlen.S | 5 +++--
> arch/riscv/lib/strncmp.S | 5 +++--
> 8 files changed, 20 insertions(+), 33 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 3b702e6cc051..a91c53b096e8 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -613,12 +613,12 @@ config TOOLCHAIN_HAS_VECTOR_CRYPTO
>
> config RISCV_ISA_ZBB
> bool "Zbb extension support for bit manipulation instructions"
> - depends on TOOLCHAIN_HAS_ZBB
> depends on RISCV_ALTERNATIVE
> default y
> help
> Add support for enabling optimisations in the kernel when the
> - Zbb extension is detected at boot.
> + Zbb extension is detected at boot. Some optimisations may
> + additionally depend on toolchain support for Zbb.
>
> The Zbb extension provides instructions to accelerate a number
> of bit-specific operations (count bit population, sign extending,
> diff --git a/arch/riscv/include/asm/arch_hweight.h
> b/arch/riscv/include/asm/arch_hweight.h
> index 85b2c443823e..b94db541901a 100644
> --- a/arch/riscv/include/asm/arch_hweight.h
> +++ b/arch/riscv/include/asm/arch_hweight.h
> @@ -19,7 +19,7 @@
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> {
> -#ifdef CONFIG_RISCV_ISA_ZBB
> +#if defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)
> asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
> RISCV_ISA_EXT_ZBB, 1)
> : : : : legacy);
> @@ -50,7 +50,7 @@ static inline unsigned int __arch_hweight8(unsigned int
> w)
> #if BITS_PER_LONG == 64
> static __always_inline unsigned long __arch_hweight64(__u64 w)
> {
> -# ifdef CONFIG_RISCV_ISA_ZBB
> +#if defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)
> asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
> RISCV_ISA_EXT_ZBB, 1)
> : : : : legacy);
> @@ -64,7 +64,7 @@ static __always_inline unsigned long
> __arch_hweight64(__u64 w)
> return w;
>
> legacy:
> -# endif
> +#endif
> return __sw_hweight64(w);
> }
> #else /* BITS_PER_LONG == 64 */
> diff --git a/arch/riscv/include/asm/bitops.h
> b/arch/riscv/include/asm/bitops.h
> index 880606b0469a..6966d00c3a8a 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -15,7 +15,7 @@
> #include <asm/barrier.h>
> #include <asm/bitsperlong.h>
>
> -#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE)
> +#if !(defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE)
> #include <asm-generic/bitops/__ffs.h>
> #include <asm-generic/bitops/__fls.h>
> #include <asm-generic/bitops/ffs.h>
> @@ -175,7 +175,7 @@ static __always_inline int variable_fls(unsigned int x)
> variable_fls(x_); \
> })
>
> -#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE)
> */
> +#endif /* !(defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) */
>
> #include <asm-generic/bitops/ffz.h>
> #include <asm-generic/bitops/fls64.h>
> diff --git a/arch/riscv/include/asm/checksum.h
> b/arch/riscv/include/asm/checksum.h
> index 88e6f1499e88..da378856f1d5 100644
> --- a/arch/riscv/include/asm/checksum.h
> +++ b/arch/riscv/include/asm/checksum.h
> @@ -49,8 +49,7 @@ static inline __sum16 ip_fast_csum(const void *iph,
> unsigned int ihl)
> * ZBB only saves three instructions on 32-bit and five on 64-bit so not
> * worth checking if supported without Alternatives.
> */
> - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
> unsigned long fold_temp;
>
> asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
> diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
> index 7fb12c59e571..9408f50ca59a 100644
> --- a/arch/riscv/lib/csum.c
> +++ b/arch/riscv/lib/csum.c
> @@ -40,12 +40,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr
> *saddr,
> uproto = (__force unsigned int)htonl(proto);
> sum += uproto;
>
> - /*
> - * Zbb support saves 4 instructions, so not worth checking without
> - * alternatives if supported
> - */
> - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
> unsigned long fold_temp;
>
> /*
> @@ -157,12 +152,7 @@ do_csum_with_alignment(const unsigned char
> *buff, int len)
> csum = do_csum_common(ptr, end, data);
>
> #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
> - /*
> - * Zbb support saves 6 instructions, so not worth checking without
> - * alternatives if supported
> - */
> - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
> unsigned long fold_temp;
>
> /*
> @@ -244,12 +234,7 @@ do_csum_no_alignment(const unsigned char *buff,
> int len)
> end = (const unsigned long *)(buff + len);
> csum = do_csum_common(ptr, end, data);
>
> - /*
> - * Zbb support saves 6 instructions, so not worth checking without
> - * alternatives if supported
> - */
> - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
> unsigned long fold_temp;
>
> /*
> diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
> index 687b2bea5c43..204fb1c184f3 100644
> --- a/arch/riscv/lib/strcmp.S
> +++ b/arch/riscv/lib/strcmp.S
> @@ -8,7 +8,8 @@
> /* int strcmp(const char *cs, const char *ct) */
> SYM_FUNC_START(strcmp)
>
> - ALTERNATIVE("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB,
> CONFIG_RISCV_ISA_ZBB)
> + __ALTERNATIVE_CFG("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB,
> + IS_ENABLED(CONFIG_RISCV_ISA_ZBB_ALT) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
s/CONFIG_RISCV_ISA_ZBB_ALT/ CONFIG_RISCV_ISA_ZBB
Same issue for below changes in strn*.S
BRs,
Xiao
>
> /*
> * Returns
> @@ -43,7 +44,7 @@ SYM_FUNC_START(strcmp)
> * The code was published as part of the bitmanip manual
> * in Appendix A.
> */
> -#ifdef CONFIG_RISCV_ISA_ZBB
> +#if defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)
> strcmp_zbb:
>
> .option push
> diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
> index 8ae3064e45ff..84909807d988 100644
> --- a/arch/riscv/lib/strlen.S
> +++ b/arch/riscv/lib/strlen.S
> @@ -8,7 +8,8 @@
> /* int strlen(const char *s) */
> SYM_FUNC_START(strlen)
>
> - ALTERNATIVE("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB,
> CONFIG_RISCV_ISA_ZBB)
> + __ALTERNATIVE_CFG("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB,
> + IS_ENABLED(CONFIG_RISCV_ISA_ZBB_ALT) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
>
> /*
> * Returns
> @@ -33,7 +34,7 @@ SYM_FUNC_START(strlen)
> /*
> * Variant of strlen using the ZBB extension if available
> */
> -#ifdef CONFIG_RISCV_ISA_ZBB
> +#if defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)
> strlen_zbb:
>
> #ifdef CONFIG_CPU_BIG_ENDIAN
> diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
> index aba5b3148621..87e7c83c1672 100644
> --- a/arch/riscv/lib/strncmp.S
> +++ b/arch/riscv/lib/strncmp.S
> @@ -8,7 +8,8 @@
> /* int strncmp(const char *cs, const char *ct, size_t count) */
> SYM_FUNC_START(strncmp)
>
> - ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB,
> CONFIG_RISCV_ISA_ZBB)
> + __ALTERNATIVE_CFG("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB,
> + IS_ENABLED(CONFIG_RISCV_ISA_ZBB_ALT) &&
> IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
>
> /*
> * Returns
> @@ -46,7 +47,7 @@ SYM_FUNC_START(strncmp)
> /*
> * Variant of strncmp using the ZBB extension if available
> */
> -#ifdef CONFIG_RISCV_ISA_ZBB
> +#if defined(CONFIG_RISCV_ISA_ZBB) &&
> defined(CONFIG_TOOLCHAIN_HAS_ZBB)
> strncmp_zbb:
>
> .option push
> --
> 2.43.0