Re: [RFC PATCH 02/13] powerpc: Add initial Dynamic Execution Control Register (DEXCR) support
From: Nicholas Piggin
Date: Mon Mar 06 2023 - 23:46:58 EST
On Mon Nov 28, 2022 at 12:44 PM AEST, Benjamin Gray wrote:
> ISA 3.1B introduces the Dynamic Execution Control Register (DEXCR). It
> is a per-cpu register that allows control over various CPU behaviours
> including branch hint usage, indirect branch speculation, and
> hashst/hashchk support.
>
> Though introduced in 3.1B, no CPUs using 3.1 were released, so
> CPU_FTR_ARCH_31 is used to determine support for the register itself.
> Support for each DEXCR bit (aspect) is reported separately by the
> firmware.
>
> Add various definitions and basic support for the DEXCR in the kernel.
> Right now it just initialises and maintains the DEXCR on process
> creation/swap, and clears it in reset_sprs().
>
A couple of comments below, but it looks good:
Reviewed-by: Nicholas Piggin <npiggin@xxxxxxxxx>
> Signed-off-by: Benjamin Gray <bgray@xxxxxxxxxxxxx>
> ---
> arch/powerpc/include/asm/book3s/64/kexec.h | 3 +++
> arch/powerpc/include/asm/cputable.h | 8 ++++++-
> arch/powerpc/include/asm/processor.h | 13 +++++++++++
> arch/powerpc/include/asm/reg.h | 6 ++++++
> arch/powerpc/kernel/Makefile | 1 +
> arch/powerpc/kernel/dexcr.c | 25 ++++++++++++++++++++++
> arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++++
> arch/powerpc/kernel/process.c | 13 ++++++++++-
> arch/powerpc/kernel/prom.c | 4 ++++
> 9 files changed, 75 insertions(+), 2 deletions(-)
> create mode 100644 arch/powerpc/kernel/dexcr.c
>
> diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h
> index d4b9d476ecba..563baf94a962 100644
> --- a/arch/powerpc/include/asm/book3s/64/kexec.h
> +++ b/arch/powerpc/include/asm/book3s/64/kexec.h
> @@ -21,6 +21,9 @@ static inline void reset_sprs(void)
> plpar_set_ciabr(0);
> }
>
> + if (cpu_has_feature(CPU_FTR_ARCH_31))
> + mtspr(SPRN_DEXCR, 0);
> +
> /* Do we need isync()? We are going via a kexec reset */
> isync();
> }
> diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
> index 757dbded11dc..03bc192f2d8b 100644
> --- a/arch/powerpc/include/asm/cputable.h
> +++ b/arch/powerpc/include/asm/cputable.h
> @@ -192,6 +192,10 @@ static inline void cpu_feature_keys_init(void) { }
> #define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000)
> #define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000)
> #define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000)
> +#define CPU_FTR_DEXCR_SBHE LONG_ASM_CONST(0x0010000000000000)
> +#define CPU_FTR_DEXCR_IBRTPD LONG_ASM_CONST(0x0020000000000000)
> +#define CPU_FTR_DEXCR_SRAPD LONG_ASM_CONST(0x0040000000000000)
> +#define CPU_FTR_DEXCR_NPHIE LONG_ASM_CONST(0x0080000000000000)
We potentially don't need to use CPU_FTR bits for each of these. We
only really want them to use instruction patching and make feature
tests fast. But we have been a bit liberal with using them and they
are kind of tied into cpu feature parsing code so maybe it's easier
to go with them for now.
>
> #ifndef __ASSEMBLY__
>
> @@ -451,7 +455,9 @@ static inline void cpu_feature_keys_init(void) { }
> CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
> CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
> CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
> - CPU_FTR_DAWR | CPU_FTR_DAWR1)
> + CPU_FTR_DAWR | CPU_FTR_DAWR1 | \
> + CPU_FTR_DEXCR_SBHE | CPU_FTR_DEXCR_IBRTPD | CPU_FTR_DEXCR_SRAPD | \
> + CPU_FTR_DEXCR_NPHIE)
> #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \
> CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
> CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
> index 631802999d59..0a8a793b8b8b 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -446,6 +446,19 @@ int exit_vmx_usercopy(void);
> int enter_vmx_ops(void);
> void *exit_vmx_ops(void *dest);
>
> +#ifdef CONFIG_PPC_BOOK3S_64
> +
> +unsigned long get_thread_dexcr(struct thread_struct const *t);
> +
> +#else
> +
> +static inline unsigned long get_thread_dexcr(struct thread_struct const *t)
> +{
> + return 0;
> +}
> +
> +#endif /* CONFIG_PPC_BOOK3S_64 */
> +
> #endif /* __KERNEL__ */
> #endif /* __ASSEMBLY__ */
> #endif /* _ASM_POWERPC_PROCESSOR_H */
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 1e8b2e04e626..cdd1f174c399 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -385,6 +385,12 @@
> #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */
> #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
> #define SPRN_ASDR 0x330 /* Access segment descriptor register */
> +#define SPRN_DEXCR 0x33C /* Dynamic execution control register */
> +#define DEXCR_PRO_MASK(aspect) __MASK(63 - (32 + (aspect))) /* Aspect number to problem state aspect mask */
I think PR is a better shorthand for problem state than PRO. It's just
more commonly used.
We also have PPC_BIT and PPC_BITMASK, _BIT being used for single-bit
mask. So this could be -
#define DEXCR_PR_BIT(aspect) PPC_BIT(32 + (aspect))
Or maybe DEXCR_PR_ASPECT_BIT.
> +#define DEXCR_PRO_SBHE DEXCR_PRO_MASK(0) /* Speculative Branch Hint Enable */
> +#define DEXCR_PRO_IBRTPD DEXCR_PRO_MASK(3) /* Indirect Branch Recurrent Target Prediction Disable */
> +#define DEXCR_PRO_SRAPD DEXCR_PRO_MASK(4) /* Subroutine Return Address Prediction Disable */
> +#define DEXCR_PRO_NPHIE DEXCR_PRO_MASK(5) /* Non-Privileged Hash Instruction Enable */
> #define SPRN_IC 0x350 /* Virtual Instruction Count */
> #define SPRN_VTB 0x351 /* Virtual Time Base */
> #define SPRN_LDBAR 0x352 /* LD Base Address Register */
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 9b6146056e48..b112315cfdc2 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -79,6 +79,7 @@ obj-$(CONFIG_VDSO32) += vdso32_wrapper.o
> obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
> obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> obj-$(CONFIG_PPC_DAWR) += dawr.o
> +obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o
> obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
> obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
> obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
> diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c
> new file mode 100644
> index 000000000000..32a0a69ff638
> --- /dev/null
> +++ b/arch/powerpc/kernel/dexcr.c
> @@ -0,0 +1,25 @@
> +#include <linux/cache.h>
> +#include <linux/init.h>
> +
> +#include <asm/cpu_has_feature.h>
> +#include <asm/cputable.h>
> +#include <asm/processor.h>
> +#include <asm/reg.h>
> +
> +#define DEFAULT_DEXCR 0
> +
> +static int __init dexcr_init(void)
> +{
> + if (!early_cpu_has_feature(CPU_FTR_ARCH_31))
> + return 0;
> +
> + mtspr(SPRN_DEXCR, DEFAULT_DEXCR);
> +
> + return 0;
> +}
> +early_initcall(dexcr_init);
> +
> +unsigned long get_thread_dexcr(struct thread_struct const *t)
> +{
> + return DEFAULT_DEXCR;
> +}
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index c3fb9fdf5bd7..896a48211a37 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -661,6 +661,10 @@ static struct dt_cpu_feature_match __initdata
> {"prefix-instructions", feat_enable, 0},
> {"matrix-multiply-assist", feat_enable_mma, 0},
> {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1},
> + {"dexcr-speculative-branch-hint-enable", feat_enable, CPU_FTR_DEXCR_SBHE},
> + {"dexcr-indirect-branch-recurrent-target-prediction-disable", feat_enable, CPU_FTR_DEXCR_IBRTPD},
> + {"dexcr-subroutine-return-address-prediction-disable", feat_enable, CPU_FTR_DEXCR_SRAPD},
> + {"dexcr-non-privileged-hash-instruction-enable", feat_enable, CPU_FTR_DEXCR_NPHIE},
> };
>
> static bool __initdata using_dt_cpu_ftrs;
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 67da147fe34d..17d26f652b80 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1228,6 +1228,13 @@ static inline void restore_sprs(struct thread_struct *old_thread,
> if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
> old_thread->tidr != new_thread->tidr)
> mtspr(SPRN_TIDR, new_thread->tidr);
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + unsigned long new_dexcr = get_thread_dexcr(new_thread);
> +
> + if (new_dexcr != get_thread_dexcr(old_thread))
> + mtspr(SPRN_DEXCR, new_dexcr);
> + }
> #endif
>
> }
> @@ -1802,7 +1809,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>
> setup_ksp_vsid(p, sp);
>
> -#ifdef CONFIG_PPC64
> +#ifdef CONFIG_PPC64
> if (cpu_has_feature(CPU_FTR_DSCR)) {
> p->thread.dscr_inherit = current->thread.dscr_inherit;
> p->thread.dscr = mfspr(SPRN_DSCR);
> @@ -1939,6 +1946,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
> current->thread.tm_tfiar = 0;
> current->thread.load_tm = 0;
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> +#ifdef CONFIG_PPC_BOOK3S_64
> + if (cpu_has_feature(CPU_FTR_ARCH_31))
> + mtspr(SPRN_DEXCR, get_thread_dexcr(¤t->thread));
> +#endif /* CONFIG_PPC_BOOK3S_64 */
You possibly don't need the ifdef here because CPU_FTR_ARCH_31 should
fold away. Some of the others do because they're using open-coded
access to struct members, but if you're using accessor functions to
get and set such things, there may be no need to.
I think my preference is for your style.
Thanks,
Nick
> }
> EXPORT_SYMBOL(start_thread);
>
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index 1eed87d954ba..eff250e1ae9a 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -180,6 +180,10 @@ static struct ibm_feature ibm_pa_features[] __initdata = {
> .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
>
> { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
> + { .pabyte = 68, .pabit = 0, .cpu_features = CPU_FTR_DEXCR_SBHE },
> + { .pabyte = 68, .pabit = 3, .cpu_features = CPU_FTR_DEXCR_IBRTPD },
> + { .pabyte = 68, .pabit = 4, .cpu_features = CPU_FTR_DEXCR_SRAPD },
> + { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE },
> };
>
> /*
> --
> 2.38.1