Re: [RFC PATCH v0] RISCV: Report vector unaligned accesses hwprobe
From: Clément Léger
Date: Wed Jun 05 2024 - 03:55:54 EST
On 04/06/2024 18:42, Jesse Taube wrote:
>
>
> On 6/4/24 12:24, Jesse Taube wrote:
>> Detected if a system traps into the kernel on an vector unaligned access.
>> Add the result to a new key in hwprobe.
>>
>> Signed-off-by: Jesse Taube <jesse@xxxxxxxxxxxx>
>> ---
>> arch/riscv/include/asm/cpufeature.h | 3 ++
>> arch/riscv/include/asm/hwprobe.h | 2 +-
>> arch/riscv/include/uapi/asm/hwprobe.h | 6 +++
>> arch/riscv/kernel/sys_hwprobe.c | 34 ++++++++++++
>> arch/riscv/kernel/traps_misaligned.c | 60 ++++++++++++++++++++++
>> arch/riscv/kernel/unaligned_access_speed.c | 4 ++
>> 6 files changed, 108 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/riscv/include/asm/cpufeature.h
>> b/arch/riscv/include/asm/cpufeature.h
>> index 347805446151..5ad69cf25b25 100644
>> --- a/arch/riscv/include/asm/cpufeature.h
>> +++ b/arch/riscv/include/asm/cpufeature.h
>> @@ -35,9 +35,12 @@ void riscv_user_isa_enable(void);
>> #if defined(CONFIG_RISCV_MISALIGNED)
>> bool check_unaligned_access_emulated_all_cpus(void);
>> +bool check_vector_unaligned_access_all_cpus(void);
>> +
>> void unaligned_emulation_finish(void);
>> bool unaligned_ctl_available(void);
>> DECLARE_PER_CPU(long, misaligned_access_speed);
>> +DECLARE_PER_CPU(long, vector_misaligned_access);
>> #else
>> static inline bool unaligned_ctl_available(void)
>> {
>> diff --git a/arch/riscv/include/asm/hwprobe.h
>> b/arch/riscv/include/asm/hwprobe.h
>> index 630507dff5ea..150a9877b0af 100644
>> --- a/arch/riscv/include/asm/hwprobe.h
>> +++ b/arch/riscv/include/asm/hwprobe.h
>> @@ -8,7 +8,7 @@
>> #include <uapi/asm/hwprobe.h>
>> -#define RISCV_HWPROBE_MAX_KEY 6
>> +#define RISCV_HWPROBE_MAX_KEY 7
>> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
>> {
>> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h
>> b/arch/riscv/include/uapi/asm/hwprobe.h
>> index 060212331a03..4474e98d17bd 100644
>> --- a/arch/riscv/include/uapi/asm/hwprobe.h
>> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
>> @@ -68,6 +68,12 @@ struct riscv_hwprobe {
>> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
>> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
>> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
>> +#define RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF 7
>
> There were talks about combining vecotor and scalar speed for the user
> facing API into RISCV_HWPROBE_KEY_CPUPERF_0, but adding another key
> seems easier.
>
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN 0
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_EMULATED 1
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_SLOW 2
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_FAST 3
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED 4
>> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>> /* Flags */
>> diff --git a/arch/riscv/kernel/sys_hwprobe.c
>> b/arch/riscv/kernel/sys_hwprobe.c
>> index b286b73e763e..ce641cc6e47a 100644
>> --- a/arch/riscv/kernel/sys_hwprobe.c
>> +++ b/arch/riscv/kernel/sys_hwprobe.c
>> @@ -184,6 +184,36 @@ static u64 hwprobe_misaligned(const struct
>> cpumask *cpus)
>> }
>> #endif
>> +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
>> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
>> +{
>> + int cpu;
>> + u64 perf = -1ULL;
>> +
>> + for_each_cpu(cpu, cpus) {
>> + int this_perf = per_cpu(vector_misaligned_access, cpu);
>> +
>> + if (perf == -1ULL)
>> + perf = this_perf;
>> +
>> + if (perf != this_perf) {
>> + perf = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> + break;
>> + }
>> + }
>> +
>> + if (perf == -1ULL)
>> + return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> +
>> + return perf;
>> +}
>> +#else
>> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
>> +{
>> + return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> +}
>> +#endif
>> +
>> static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>> const struct cpumask *cpus)
>> {
>> @@ -211,6 +241,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe
>> *pair,
>> pair->value = hwprobe_misaligned(cpus);
>> break;
>> + case RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF:
>> + pair->value = hwprobe_vec_misaligned(cpus);
>> + break;
>> +
>> case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
>> pair->value = 0;
>> if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
>> diff --git a/arch/riscv/kernel/traps_misaligned.c
>> b/arch/riscv/kernel/traps_misaligned.c
>> index 2adb7c3e4dd5..0c07e990e9c5 100644
>> --- a/arch/riscv/kernel/traps_misaligned.c
>> +++ b/arch/riscv/kernel/traps_misaligned.c
>> @@ -16,6 +16,7 @@
>> #include <asm/entry-common.h>
>> #include <asm/hwprobe.h>
>> #include <asm/cpufeature.h>
>> +#include <asm/vector.h>
>> #define INSN_MATCH_LB 0x3
>> #define INSN_MASK_LB 0x707f
>> @@ -426,6 +427,14 @@ int handle_misaligned_load(struct pt_regs *regs)
>> if (get_insn(regs, epc, &insn))
>> return -1;
>>
>
> Is this an appropriate way to check if there is vector missaligned
> access? What if a unaligned vector load as called by the kernel before
> this check?
>
>> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>> + if (*this_cpu_ptr(&vector_misaligned_access) ==
>> RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN) {
>> + *this_cpu_ptr(&vector_misaligned_access) =
>> RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
>> + regs->epc = epc + INSN_LEN(insn);
>> + return 0;
>> + }
>> +#endif
Since this code will be called for standard load/store misaligned
accesses your, I guess this needs to check if the faulty instruction
itself is a vector related one before setting it to supported. I don't
know what the specs says about that but I guess it shoukld be checked if
vector load/store and standard load/store could have different behaviors
with respect to misaligned accesses.
Regardless of that (except if I missed something), the emulation code
does not actually support vector load/store instructions.
So, support for misaligned vector load/store should be added in a first
place before reporting that it is actually "supported".
Thanks,
Clément
>> +
>> regs->epc = 0;
>> if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
>> @@ -625,6 +634,57 @@ static bool check_unaligned_access_emulated(int cpu)
>> return misaligned_emu_detected;
>> }
>> +#ifdef CONFIG_RISCV_ISA_V
>> +static bool check_vector_unaligned_access(int cpu)
>> +{
>> + long *mas_ptr = per_cpu_ptr(&vector_misaligned_access, cpu);
>> + struct riscv_isainfo *isainfo = &hart_isa[cpu];
>> + unsigned long tmp_var;
>> + bool misaligned_vec_suported;
>> +
>> + if (!riscv_isa_extension_available(isainfo->isa, v))
>> + return false;
>> +
>> + /* This case will only happen if a unaligned vector load
>> + * was called by the kernel before this check
>> + */
>> + if (*mas_ptr != RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN)
>> + return false;
>> +
>> + kernel_vector_begin();
>> + __asm__ __volatile__ (
>> + ".option push\n\t"
>> + ".option arch, +v\n\t"
>> + " li t1, 0x1\n" //size
>> + " vsetvli t0, t1, e16, m2, ta, ma\n\t" // Vectors of
>> 16b
>> + " addi t0, %[ptr], 1\n\t" // Misalign address
>> + " vle16.v v0, (t0)\n\t" // Load bytes
>> + ".option pop\n\t"
>> + : : [ptr] "r" (&tmp_var) : "v0", "t0", "t1", "memory");
>> + kernel_vector_end();
>> +
>> + misaligned_vec_suported = (*mas_ptr ==
>> RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN);
>> +
>> + return misaligned_vec_suported;
>> +}
>> +#else
>> +static bool check_vector_unaligned_access(int cpu)
>> +{
>> + return false;
>> +}
>> +#endif
>> +
>> +bool check_vector_unaligned_access_all_cpus(void)
>> +{
>> + int cpu;
>> +
>> + for_each_online_cpu(cpu)
>> + if (!check_vector_unaligned_access(cpu))
>> + return false;
>> +
>> + return true;
>> +}
>> +
>> bool check_unaligned_access_emulated_all_cpus(void)
>> {
>> int cpu;
>> diff --git a/arch/riscv/kernel/unaligned_access_speed.c
>> b/arch/riscv/kernel/unaligned_access_speed.c
>> index a9a6bcb02acf..92a84239beaa 100644
>> --- a/arch/riscv/kernel/unaligned_access_speed.c
>> +++ b/arch/riscv/kernel/unaligned_access_speed.c
>> @@ -20,6 +20,7 @@
>> #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
>> DEFINE_PER_CPU(long, misaligned_access_speed);
>> +DEFINE_PER_CPU(long, vector_misaligned_access) =
>> RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>> static cpumask_t fast_misaligned_access;
>> @@ -264,6 +265,8 @@ static int check_unaligned_access_all_cpus(void)
>> {
>> bool all_cpus_emulated =
>> check_unaligned_access_emulated_all_cpus();
>>
>
> There was talks about Zicclsm, but spike doesnt have support for Zicclsm
> afaik, but I was wondering if i should add Zicclsm to cpufeature and
> aswell.
>
> If anyone wants to run this i tested with
> spike --misaligned --isa=RV64IMAFDCV_zicntr_zihpm
> --kernel=arch/riscv/boot/Image
> opensbi/build/platform/generic/firmware/fw_jump.elf
AFAIK, --misaligned tells spike to actually handle misaligned load/store
in "hardware" and not generate a misaligned trap so I guess you would
need to remove that flag and use a specific openSBI version that always
delegate the misaligned load/store traps if you want to be sure that the
kernel actually handles the vector misaligned load/store traps.
I previously made a small test utility to verify that the kernel
correctly gets the misaligned traps [1]. If it fails then, your kernel
probably do not get any trap :)
Link: https://github.com/clementleger/unaligned_test [1]
>
>
> Thanks,
> Jesse Taube
>
>
>> + check_vector_unaligned_access_all_cpus();
>> +
>> if (!all_cpus_emulated)
>> return check_unaligned_access_speed_all_cpus();
>> @@ -273,6 +276,7 @@ static int check_unaligned_access_all_cpus(void)
>> static int check_unaligned_access_all_cpus(void)
>> {
>> check_unaligned_access_emulated_all_cpus();
>> + check_vector_unaligned_access_all_cpus();
>> return 0;
>> }