Re: [PATCH 2/3] RISC-V: Detect unaligned vector accesses supported.

From: Clément Léger
Date: Mon Jun 10 2024 - 04:24:05 EST




On 06/06/2024 23:29, Charlie Jenkins wrote:
> On Thu, Jun 06, 2024 at 02:32:14PM -0400, Jesse Taube wrote:
>> Run a unaligned vector access to test if the system supports
>> vector unaligned access. Add the result to a new key in hwprobe.
>> This is useful for usermode to know if vector misaligned accesses are
>> supported and if they are faster or slower than equivalent byte accesses.
>>
>> Signed-off-by: Jesse Taube <jesse@xxxxxxxxxxxx>
>> ---
>> arch/riscv/include/asm/cpufeature.h | 2 +
>> arch/riscv/include/asm/hwprobe.h | 2 +-
>> arch/riscv/include/asm/vector.h | 1 +
>> arch/riscv/include/uapi/asm/hwprobe.h | 6 ++
>> arch/riscv/kernel/sys_hwprobe.c | 34 +++++++++
>> arch/riscv/kernel/traps_misaligned.c | 84 ++++++++++++++++++++--
>> arch/riscv/kernel/unaligned_access_speed.c | 4 ++
>> arch/riscv/kernel/vector.c | 2 +-
>> 8 files changed, 129 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>> index 347805446151..a012c8490a27 100644
>> --- a/arch/riscv/include/asm/cpufeature.h
>> +++ b/arch/riscv/include/asm/cpufeature.h
>> @@ -35,9 +35,11 @@ void riscv_user_isa_enable(void);
>>
>> #if defined(CONFIG_RISCV_MISALIGNED)
>> bool check_unaligned_access_emulated_all_cpus(void);
>> +bool check_vector_unaligned_access_all_cpus(void);
>> void unaligned_emulation_finish(void);
>> bool unaligned_ctl_available(void);
>> DECLARE_PER_CPU(long, misaligned_access_speed);
>> +DECLARE_PER_CPU(long, vector_misaligned_access);
>> #else
>> static inline bool unaligned_ctl_available(void)
>> {
>> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
>> index 630507dff5ea..150a9877b0af 100644
>> --- a/arch/riscv/include/asm/hwprobe.h
>> +++ b/arch/riscv/include/asm/hwprobe.h
>> @@ -8,7 +8,7 @@
>>
>> #include <uapi/asm/hwprobe.h>
>>
>> -#define RISCV_HWPROBE_MAX_KEY 6
>> +#define RISCV_HWPROBE_MAX_KEY 7
>>
>> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
>> {
>> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
>> index 731dcd0ed4de..776af9b37e23 100644
>> --- a/arch/riscv/include/asm/vector.h
>> +++ b/arch/riscv/include/asm/vector.h
>> @@ -21,6 +21,7 @@
>>
>> extern unsigned long riscv_v_vsize;
>> int riscv_v_setup_vsize(void);
>> +bool insn_is_vector(u32 insn_buf);
>> bool riscv_v_first_use_handler(struct pt_regs *regs);
>> void kernel_vector_begin(void);
>> void kernel_vector_end(void);
>> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
>> index 060212331a03..ebacff86f134 100644
>> --- a/arch/riscv/include/uapi/asm/hwprobe.h
>> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
>> @@ -68,6 +68,12 @@ struct riscv_hwprobe {
>> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
>> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
>> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
>> +#define RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF 7
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN 0
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED 1
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_SLOW 2
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_FAST 3
>> +#define RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED 4
>> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>>
>> /* Flags */
>> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
>> index b286b73e763e..ce641cc6e47a 100644
>> --- a/arch/riscv/kernel/sys_hwprobe.c
>> +++ b/arch/riscv/kernel/sys_hwprobe.c
>> @@ -184,6 +184,36 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
>> }
>> #endif
>>
>> +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
>> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
>> +{
>> + int cpu;
>> + u64 perf = -1ULL;
>> +
>> + for_each_cpu(cpu, cpus) {
>> + int this_perf = per_cpu(vector_misaligned_access, cpu);
>> +
>> + if (perf == -1ULL)
>> + perf = this_perf;
>> +
>> + if (perf != this_perf) {
>> + perf = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> + break;
>> + }
>> + }
>> +
>> + if (perf == -1ULL)
>> + return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> +
>> + return perf;
>> +}
>> +#else
>> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
>> +{
>> + return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>> +}
>> +#endif
>> +
>> static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>> const struct cpumask *cpus)
>> {
>> @@ -211,6 +241,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>> pair->value = hwprobe_misaligned(cpus);
>> break;
>>
>> + case RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF:
>> + pair->value = hwprobe_vec_misaligned(cpus);
>> + break;
>> +
>> case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
>> pair->value = 0;
>> if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
>> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
>> index 2adb7c3e4dd5..8f26c3d92230 100644
>> --- a/arch/riscv/kernel/traps_misaligned.c
>> +++ b/arch/riscv/kernel/traps_misaligned.c
>> @@ -16,6 +16,7 @@
>> #include <asm/entry-common.h>
>> #include <asm/hwprobe.h>
>> #include <asm/cpufeature.h>
>> +#include <asm/vector.h>
>>
>> #define INSN_MATCH_LB 0x3
>> #define INSN_MASK_LB 0x707f
>> @@ -413,10 +414,6 @@ int handle_misaligned_load(struct pt_regs *regs)
>>
>> perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
>>
>> -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>> - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
>> -#endif
>> -
>> if (!unaligned_enabled)
>> return -1;
>>
>> @@ -426,6 +423,17 @@ int handle_misaligned_load(struct pt_regs *regs)
>> if (get_insn(regs, epc, &insn))
>> return -1;
>>
>> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>> + if (insn_is_vector(insn) &&
>> + *this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED) {
>> + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
>> + regs->epc = epc + INSN_LEN(insn);
>> + return 0;
>> + }
>> +
>> + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
>
> This unconditionally sets scalar unaligned accesses even if the
> unaligned access is caused by vector. Scalar unaligned accesses should
> only be set to emulated if this function is entered from a scalar
> unaligned load.
>
> The rest of this function handles how scalar unaligned accesses are
> emulated, and the equivalent needs to happen for vector. You need to add
> routines that manually load the data from the memory address into the
> vector register. When Clément did this for scalar, he provided a test
> case to help reviewers [1]. Please add onto these test cases or make
> your own for vector.
>
> Link: https://github.com/clementleger/unaligned_test [1]

Hi Jesse,

I already mentionned that in a previous message and got no answer [1].
Please address all feedback before sending another version. Adding a
cover letter for such series would also be appreciated.

Thanks,

Clément

Link :https://lore.kernel.org/all/2c355b76-c768-46b1-9f37
fd1991a54bfd@xxxxxxxxxxxx/ [1]

>
>> +#endif
>> +
>> regs->epc = 0;
>>
>> if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
>> @@ -625,6 +633,74 @@ static bool check_unaligned_access_emulated(int cpu)
>> return misaligned_emu_detected;
>> }
>>
>> +#ifdef CONFIG_RISCV_ISA_V
>> +static void check_vector_unaligned_access(struct work_struct *unused)
>
> Can you standardize this name with the scalar version by writing
> emulated in it?
>
> "check_vector_unaligned_access_emulated_all_cpus"
>
>> +{
>> + int cpu = smp_processor_id();
>> + long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
>> + unsigned long tmp_var;
>> +
>> + if (!riscv_isa_extension_available(hart_isa[cpu].isa, v))
>> + return;
>> +
>> + *mas_ptr = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>> +
>> + local_irq_enable();
>> + kernel_vector_begin();
>> + __asm__ __volatile__ (
>> + ".balign 4\n\t"
>> + ".option push\n\t"
>> + ".option arch, +v\n\t"
>> + " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b
>> + " vle16.v v0, (%[ptr])\n\t" // Load bytes
>> + ".option pop\n\t"
>> + : : [ptr] "r" ((u8 *)&tmp_var + 1) : "v0", "memory");
>
> memory is being read from, but not written to, so there is no need to
> have a memory clobber.
>
>> + kernel_vector_end();
>> +
>> + if (*mas_ptr == RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN)
>> + *mas_ptr = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>> +}
>> +
>> +bool check_vector_unaligned_access_all_cpus(void)
>> +{
>> + int cpu;
>> + bool ret = true;
>> +
>> + for_each_online_cpu(cpu)
>> + if (riscv_isa_extension_available(hart_isa[cpu].isa, ZICCLSM))
>
> zicclsm is not specific to vector so it can be extracted out of this
> vector specific function. Assuming that hardware properly reports the
> extension, if zicclsm is present then it is known that both vector and
> scalar unaligned accesses are supported.
>
>> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>
> Please use the exising UNKNOWN terminology instead of renaming to
> SUPPORTED. Any option that is not UNSUPPORTED implies that unaligned
> accesses are supported.
>
>> + else
>> + ret = false;
>> +
>> +
>> + if (ret)
>> + return true;
>> + ret = true;
>> +
>> + schedule_on_each_cpu(check_vector_unaligned_access);
>> +
>> + for_each_online_cpu(cpu)
>> + if (per_cpu(vector_misaligned_access, cpu)
>> + != RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED)
>> + return false;
>> +
>> + return ret;
>> +}
>> +#else
>
> If CONFIG_RISCV_ISA_V is not set, there is no value in checking if
> vector unaligned accesses are supported because userspace will not be
> allowed to use vector instructions anyway.
>
> - Charlie
>
>> +bool check_vector_unaligned_access_all_cpus(void)
>> +{
>> + int cpu;
>> +
>> + for_each_online_cpu(cpu)
>> + if (riscv_isa_extension_available(hart_isa[cpu].isa, ZICCLSM))
>> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>> + else
>> + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
>> +
>> + return false;
>> +}
>> +#endif
>> +
>> bool check_unaligned_access_emulated_all_cpus(void)
>> {
>> int cpu;
>> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
>> index a9a6bcb02acf..92a84239beaa 100644
>> --- a/arch/riscv/kernel/unaligned_access_speed.c
>> +++ b/arch/riscv/kernel/unaligned_access_speed.c
>> @@ -20,6 +20,7 @@
>> #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
>>
>> DEFINE_PER_CPU(long, misaligned_access_speed);
>> +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>>
>> #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>> static cpumask_t fast_misaligned_access;
>> @@ -264,6 +265,8 @@ static int check_unaligned_access_all_cpus(void)
>> {
>> bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
>>
>> + check_vector_unaligned_access_all_cpus();
>> +
>> if (!all_cpus_emulated)
>> return check_unaligned_access_speed_all_cpus();
>>
>> @@ -273,6 +276,7 @@ static int check_unaligned_access_all_cpus(void)
>> static int check_unaligned_access_all_cpus(void)
>> {
>> check_unaligned_access_emulated_all_cpus();
>> + check_vector_unaligned_access_all_cpus();
>>
>> return 0;
>> }
>> diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
>> index 6727d1d3b8f2..2cceab739b2c 100644
>> --- a/arch/riscv/kernel/vector.c
>> +++ b/arch/riscv/kernel/vector.c
>> @@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void)
>> #endif
>> }
>>
>> -static bool insn_is_vector(u32 insn_buf)
>> +bool insn_is_vector(u32 insn_buf)
>> {
>> u32 opcode = insn_buf & __INSN_OPCODE_MASK;
>> u32 width, csr;
>> --
>> 2.43.0
>>