Re: [PATCH v2 7/8] riscv: report misaligned accesses emulation to hwprobe

From: Clément Léger
Date: Mon Oct 09 2023 - 09:07:46 EST




On 04/10/2023 18:14, Evan Green wrote:
> On Wed, Oct 4, 2023 at 8:14 AM Clément Léger <cleger@xxxxxxxxxxxx> wrote:
>>
>> hwprobe provides a way to report if misaligned access are emulated. In
>> order to correctly populate that feature, we can check if it actually
>> traps when doing a misaligned access. This can be checked using an
>> exception table entry which will actually be used when a misaligned
>> access is done from kernel mode.
>>
>> Signed-off-by: Clément Léger <cleger@xxxxxxxxxxxx>
>> ---
>> arch/riscv/include/asm/cpufeature.h | 18 +++++++++
>> arch/riscv/kernel/cpufeature.c | 4 ++
>> arch/riscv/kernel/smpboot.c | 2 +-
>> arch/riscv/kernel/traps_misaligned.c | 56 ++++++++++++++++++++++++++++
>> 4 files changed, 79 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>> index d0345bd659c9..e4ae6af51876 100644
>> --- a/arch/riscv/include/asm/cpufeature.h
>> +++ b/arch/riscv/include/asm/cpufeature.h
>> @@ -32,4 +32,22 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
>>
>> void check_unaligned_access(int cpu);
>>
>> +#ifdef CONFIG_RISCV_MISALIGNED
>> +bool unaligned_ctl_available(void);
>> +bool check_unaligned_access_emulated(int cpu);
>> +void unaligned_emulation_finish(void);
>> +#else
>> +static inline bool unaligned_ctl_available(void)
>> +{
>> + return false;
>> +}
>> +
>> +static inline bool check_unaligned_access_emulated(int cpu)
>> +{
>> + return false;
>> +}
>> +
>> +static inline void unaligned_emulation_finish(void) {}
>> +#endif
>> +
>> #endif
>> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
>> index 356e5677eeb1..fbbde800bc21 100644
>> --- a/arch/riscv/kernel/cpufeature.c
>> +++ b/arch/riscv/kernel/cpufeature.c
>> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
>> void *src;
>> long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
>>
>> + if (check_unaligned_access_emulated(cpu))
>> + return;
>> +
>> page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
>> if (!page) {
>> pr_warn("Can't alloc pages to measure memcpy performance");
>> @@ -648,6 +651,7 @@ void check_unaligned_access(int cpu)
>> static int __init check_unaligned_access_boot_cpu(void)
>> {
>> check_unaligned_access(0);
>> + unaligned_emulation_finish();
>> return 0;
>> }
>>
>> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
>> index 1b8da4e40a4d..5d9858d6ad26 100644
>> --- a/arch/riscv/kernel/smpboot.c
>> +++ b/arch/riscv/kernel/smpboot.c
>> @@ -245,8 +245,8 @@ asmlinkage __visible void smp_callin(void)
>> riscv_ipi_enable();
>>
>> numa_add_cpu(curr_cpuid);
>> - set_cpu_online(curr_cpuid, 1);
>> check_unaligned_access(curr_cpuid);
>> + set_cpu_online(curr_cpuid, 1);
>>
>> if (has_vector()) {
>> if (riscv_v_setup_vsize())
>> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
>> index b5fb1ff078e3..d99b95084b6c 100644
>> --- a/arch/riscv/kernel/traps_misaligned.c
>> +++ b/arch/riscv/kernel/traps_misaligned.c
>> @@ -14,6 +14,8 @@
>> #include <asm/ptrace.h>
>> #include <asm/csr.h>
>> #include <asm/entry-common.h>
>> +#include <asm/hwprobe.h>
>> +#include <asm/cpufeature.h>
>>
>> #define INSN_MATCH_LB 0x3
>> #define INSN_MASK_LB 0x707f
>> @@ -396,6 +398,8 @@ union reg_data {
>> u64 data_u64;
>> };
>>
>> +static bool unaligned_ctl __read_mostly;
>> +
>> /* sysctl hooks */
>> int unaligned_enabled __read_mostly = 1; /* Enabled by default */
>>
>> @@ -409,6 +413,8 @@ int handle_misaligned_load(struct pt_regs *regs)
>>
>> perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
>>
>> + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
>> +
>> if (!unaligned_enabled)
>> return -1;
>>
>> @@ -585,3 +591,53 @@ int handle_misaligned_store(struct pt_regs *regs)
>>
>> return 0;
>> }
>> +
>> +bool check_unaligned_access_emulated(int cpu)
>> +{
>> + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
>> + unsigned long tmp_var, tmp_val;
>> + bool misaligned_emu_detected;
>> +
>> + *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
>> +
>> + __asm__ __volatile__ (
>> + " "REG_L" %[tmp], 1(%[ptr])\n"
>> + : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
>> +
>> + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED);
>> + /*
>> + * If unaligned_ctl is already set, this means that we detected that all
>> + * CPUS uses emulated misaligned access at boot time. If that changed
>> + * when hotplugging the new cpu, this is something we don't handle.
>> + */
>> + if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
>> + pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
>> + while (true)
>> + cpu_relax();
>
> So the idea is to spin long enough that the
> wait_for_completion(&cpu_running, 1000ms) times out? Maybe there
> should be a wfi() in here as well so we're not just burning white hot.

Hi Evan,

Yes the idea is to let the timeout fail. We could potentially add some
shared state to report an error during bring up and thus not wait up to
the timeout end. I'll check that.

Regarding th wfi(), the cpu_relax() call is actually meant to do that.
On RISC-V implementation, it ends up on a pause if Zihintpause is available

> Have you verified that if we get here, the CPU will also get taken
> back down after the timeout? I wonder if __cpu_up() also needs a call
> to stop the CPU, in the case where that wait_for_completion_timeout()
> times out.

I actually checked that it is not brought up but not the complete path
to check if it is correctly brought down, I'll check that.

>
> It also might be more intuitive to reorganize this such that the death
> loop happens in smp_callin(), as check_unaligned_access_emulated() is
> not a function you'd expect might sometimes never return.

Yes, that makes sense.

Clément

>
> -Evan