Re: [PATCH 6/7] riscv: report misaligned accesses emulation to hwprobe

From: Evan Green
Date: Thu Sep 28 2023 - 12:52:39 EST


On Thu, Sep 28, 2023 at 12:46 AM Clément Léger <cleger@xxxxxxxxxxxx> wrote:
>
>
>
> On 26/09/2023 23:57, Evan Green wrote:
> > On Tue, Sep 26, 2023 at 8:03 AM Clément Léger <cleger@xxxxxxxxxxxx> wrote:
> >>
> >> hwprobe provides a way to report if misaligned access are emulated. In
> >> order to correctly populate that feature, we can check if it actually
> >> traps when doing a misaligned access. This can be checked using an
> >> exception table entry which will actually be used when a misaligned
> >> access is done from kernel mode.
> >>
> >> Signed-off-by: Clément Léger <cleger@xxxxxxxxxxxx>
> >> ---
> >> arch/riscv/include/asm/cpufeature.h | 6 +++
> >> arch/riscv/kernel/cpufeature.c | 6 ++-
> >> arch/riscv/kernel/setup.c | 1 +
> >> arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
> >> 4 files changed, 74 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> >> index d0345bd659c9..c1f0ef02cd7d 100644
> >> --- a/arch/riscv/include/asm/cpufeature.h
> >> +++ b/arch/riscv/include/asm/cpufeature.h
> >> @@ -8,6 +8,7 @@
> >>
> >> #include <linux/bitmap.h>
> >> #include <asm/hwcap.h>
> >> +#include <asm/hwprobe.h>
> >>
> >> /*
> >> * These are probed via a device_initcall(), via either the SBI or directly
> >> @@ -32,4 +33,9 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
> >>
> >> void check_unaligned_access(int cpu);
> >>
> >> +bool unaligned_ctl_available(void);
> >> +
> >> +bool check_unaligned_access_emulated(int cpu);
> >> +void unaligned_emulation_finish(void);
> >> +
> >> #endif
> >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> >> index 1cfbba65d11a..fbbde800bc21 100644
> >> --- a/arch/riscv/kernel/cpufeature.c
> >> +++ b/arch/riscv/kernel/cpufeature.c
> >> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
> >> void *src;
> >> long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
> >>
> >> + if (check_unaligned_access_emulated(cpu))
> >
> > This spot (referenced below).
> >
> >> + return;
> >> +
> >> page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
> >> if (!page) {
> >> pr_warn("Can't alloc pages to measure memcpy performance");
> >> @@ -645,9 +648,10 @@ void check_unaligned_access(int cpu)
> >> __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
> >> }
> >>
> >> -static int check_unaligned_access_boot_cpu(void)
> >> +static int __init check_unaligned_access_boot_cpu(void)
> >> {
> >> check_unaligned_access(0);
> >> + unaligned_emulation_finish();
> >> return 0;
> >> }
> >>
> >> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >> index e600aab116a4..3af6ad4df7cf 100644
> >> --- a/arch/riscv/kernel/setup.c
> >> +++ b/arch/riscv/kernel/setup.c
> >> @@ -26,6 +26,7 @@
> >> #include <asm/acpi.h>
> >> #include <asm/alternative.h>
> >> #include <asm/cacheflush.h>
> >> +#include <asm/cpufeature.h>
> >> #include <asm/cpu_ops.h>
> >> #include <asm/early_ioremap.h>
> >> #include <asm/pgtable.h>
> >> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> >> index b5fb1ff078e3..fa81f6952fa4 100644
> >> --- a/arch/riscv/kernel/traps_misaligned.c
> >> +++ b/arch/riscv/kernel/traps_misaligned.c
> >> @@ -9,11 +9,14 @@
> >> #include <linux/perf_event.h>
> >> #include <linux/irq.h>
> >> #include <linux/stringify.h>
> >> +#include <linux/prctl.h>
> >>
> >> #include <asm/processor.h>
> >> #include <asm/ptrace.h>
> >> #include <asm/csr.h>
> >> #include <asm/entry-common.h>
> >> +#include <asm/hwprobe.h>
> >> +#include <asm/cpufeature.h>
> >>
> >> #define INSN_MATCH_LB 0x3
> >> #define INSN_MASK_LB 0x707f
> >> @@ -396,8 +399,10 @@ union reg_data {
> >> u64 data_u64;
> >> };
> >>
> >> +static bool unaligned_ctl __read_mostly;
> >> +
> >> /* sysctl hooks */
> >> -int unaligned_enabled __read_mostly = 1; /* Enabled by default */
> >> +int unaligned_enabled __read_mostly;
> >>
> >> int handle_misaligned_load(struct pt_regs *regs)
> >> {
> >> @@ -412,6 +417,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> >> if (!unaligned_enabled)
> >> return -1;
> >>
> >> + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> >> + return -1;
> >> +
> >> if (get_insn(regs, epc, &insn))
> >> return -1;
> >>
> >> @@ -511,6 +519,9 @@ int handle_misaligned_store(struct pt_regs *regs)
> >> if (!unaligned_enabled)
> >> return -1;
> >>
> >> + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> >> + return -1;
> >> +
> >> if (get_insn(regs, epc, &insn))
> >> return -1;
> >>
> >> @@ -585,3 +596,53 @@ int handle_misaligned_store(struct pt_regs *regs)
> >>
> >> return 0;
> >> }
> >> +
> >> +bool check_unaligned_access_emulated(int cpu)
> >> +{
> >> + unsigned long emulated = 1, tmp_var;
> >> +
> >> + /* Use a fixup to detect if misaligned access triggered an exception */
> >> + __asm__ __volatile__ (
> >> + "1:\n"
> >> + " "REG_L" %[tmp], 1(%[ptr])\n"
> >> + " li %[emulated], 0\n"
> >> + "2:\n"
> >> + _ASM_EXTABLE(1b, 2b)
> >> + : [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
> >> + : [ptr] "r" (&tmp_var)
> >> + : "memory");
> >> +
> >> + if (!emulated)
> >> + return false;
> >> +
> >> + per_cpu(misaligned_access_speed, cpu) =
> >> + RISCV_HWPROBE_MISALIGNED_EMULATED;
> >
> > For tidiness, can we move the assignment of this per-cpu variable into
> > check_unaligned_access(), at the spot I referenced above. That way
> > people looking to see how this variable is set don't have to hunt
> > through multiple locations.
>
> Agreed, that seems better.
>
> >
> >> +
> >> + return true;
> >> +}
> >> +
> >> +void __init unaligned_emulation_finish(void)
> >> +{
> >> + int cpu;
> >> +
> >> + /*
> >> + * We can only support PR_UNALIGN controls if all CPUs have misaligned
> >> + * accesses emulated since tasks requesting such control can run on any
> >> + * CPU.
> >> + */
> >> + for_each_possible_cpu(cpu) {
> >> + if (per_cpu(misaligned_access_speed, cpu) !=
> >> + RISCV_HWPROBE_MISALIGNED_EMULATED) {
> >> + goto out;
> >> + }
> >> + }
> >> + unaligned_ctl = true;
> >
> > This doesn't handle the case where a CPU is hotplugged later that
> > doesn't match with the others. You may want to add a patch that fails
> > the onlining of that new CPU if unaligned_ctl is true and
> > new_cpu.misaligned_access_speed != EMULATED.
>
> So actually, this will require a bit more plumbing as I realize the
> switch to disable misalignment support is global. This switch should
> only be disabled at boot which means I won't be able to disable it at
> runtime (while hiotplugging a CPU) for CPU detection. There are multiple
> ways to handle that:
>
> 1- Have a per-cpu switch for misalignment handling which would be
> disabled only when detection is needed.
>
> 2- Assume that once detected at boot-time, emulation will not change.
>
> Not sure which one is better though. Advice are welcomed.

If I gaze into my own crystal ball, my hope is that the Venn diagram
of "systems that support hotplug" and "systems that still use software
assist for misaligned access" is just two circles not touching. If
people agree with that, then the safe thing to do is enforce it, by
failing to online new hotplugged CPUs that don't conform to
misaligned_access_speed == EMULATED if unaligned_ctl is true. We would
sacrifice some future flexibility by making this choice now though, so
it requires buy-in for this particular crystal ball vision.

-Evan