Re: [RFC PATCH v0] RISCV: Report vector unaligned accesses hwprobe

From: Jesse Taube
Date: Tue Jun 04 2024 - 12:47:43 EST




On 6/4/24 12:24, Jesse Taube wrote:
Detected if a system traps into the kernel on an vector unaligned access.
Add the result to a new key in hwprobe.

Signed-off-by: Jesse Taube <jesse@xxxxxxxxxxxx>
---
arch/riscv/include/asm/cpufeature.h | 3 ++
arch/riscv/include/asm/hwprobe.h | 2 +-
arch/riscv/include/uapi/asm/hwprobe.h | 6 +++
arch/riscv/kernel/sys_hwprobe.c | 34 ++++++++++++
arch/riscv/kernel/traps_misaligned.c | 60 ++++++++++++++++++++++
arch/riscv/kernel/unaligned_access_speed.c | 4 ++
6 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 347805446151..5ad69cf25b25 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -35,9 +35,12 @@ void riscv_user_isa_enable(void);
#if defined(CONFIG_RISCV_MISALIGNED)
bool check_unaligned_access_emulated_all_cpus(void);
+bool check_vector_unaligned_access_all_cpus(void);
+
void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void);
DECLARE_PER_CPU(long, misaligned_access_speed);
+DECLARE_PER_CPU(long, vector_misaligned_access);
#else
static inline bool unaligned_ctl_available(void)
{
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 630507dff5ea..150a9877b0af 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 6
+#define RISCV_HWPROBE_MAX_KEY 7
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 060212331a03..4474e98d17bd 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -68,6 +68,12 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
+#define RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF 7

There were talks about combining vecotor and scalar speed for the user facing API into RISCV_HWPROBE_KEY_CPUPERF_0, but adding another key seems easier.

+#define RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN 0
+#define RISCV_HWPROBE_VEC_MISALIGNED_EMULATED 1
+#define RISCV_HWPROBE_VEC_MISALIGNED_SLOW 2
+#define RISCV_HWPROBE_VEC_MISALIGNED_FAST 3
+#define RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED 4
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index b286b73e763e..ce641cc6e47a 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -184,6 +184,36 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
}
#endif
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+ int cpu;
+ u64 perf = -1ULL;
+
+ for_each_cpu(cpu, cpus) {
+ int this_perf = per_cpu(vector_misaligned_access, cpu);
+
+ if (perf == -1ULL)
+ perf = this_perf;
+
+ if (perf != this_perf) {
+ perf = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
+ break;
+ }
+ }
+
+ if (perf == -1ULL)
+ return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
+
+ return perf;
+}
+#else
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+ return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
+}
+#endif
+
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
{
@@ -211,6 +241,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = hwprobe_misaligned(cpus);
break;
+ case RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF:
+ pair->value = hwprobe_vec_misaligned(cpus);
+ break;
+
case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
pair->value = 0;
if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 2adb7c3e4dd5..0c07e990e9c5 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -16,6 +16,7 @@
#include <asm/entry-common.h>
#include <asm/hwprobe.h>
#include <asm/cpufeature.h>
+#include <asm/vector.h>
#define INSN_MATCH_LB 0x3
#define INSN_MASK_LB 0x707f
@@ -426,6 +427,14 @@ int handle_misaligned_load(struct pt_regs *regs)
if (get_insn(regs, epc, &insn))
return -1;

Is this an appropriate way to check if there is vector missaligned access? What if a unaligned vector load as called by the kernel before this check?

+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+ if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN) {
+ *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
+ regs->epc = epc + INSN_LEN(insn);
+ return 0;
+ }
+#endif
+
regs->epc = 0;
if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
@@ -625,6 +634,57 @@ static bool check_unaligned_access_emulated(int cpu)
return misaligned_emu_detected;
}
+#ifdef CONFIG_RISCV_ISA_V
+static bool check_vector_unaligned_access(int cpu)
+{
+ long *mas_ptr = per_cpu_ptr(&vector_misaligned_access, cpu);
+ struct riscv_isainfo *isainfo = &hart_isa[cpu];
+ unsigned long tmp_var;
+ bool misaligned_vec_suported;
+
+ if (!riscv_isa_extension_available(isainfo->isa, v))
+ return false;
+
+ /* This case will only happen if a unaligned vector load
+ * was called by the kernel before this check
+ */
+ if (*mas_ptr != RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN)
+ return false;
+
+ kernel_vector_begin();
+ __asm__ __volatile__ (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ " li t1, 0x1\n" //size
+ " vsetvli t0, t1, e16, m2, ta, ma\n\t" // Vectors of 16b
+ " addi t0, %[ptr], 1\n\t" // Misalign address
+ " vle16.v v0, (t0)\n\t" // Load bytes
+ ".option pop\n\t"
+ : : [ptr] "r" (&tmp_var) : "v0", "t0", "t1", "memory");
+ kernel_vector_end();
+
+ misaligned_vec_suported = (*mas_ptr == RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN);
+
+ return misaligned_vec_suported;
+}
+#else
+static bool check_vector_unaligned_access(int cpu)
+{
+ return false;
+}
+#endif
+
+bool check_vector_unaligned_access_all_cpus(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ if (!check_vector_unaligned_access(cpu))
+ return false;
+
+ return true;
+}
+
bool check_unaligned_access_emulated_all_cpus(void)
{
int cpu;
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index a9a6bcb02acf..92a84239beaa 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -20,6 +20,7 @@
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
DEFINE_PER_CPU(long, misaligned_access_speed);
+DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static cpumask_t fast_misaligned_access;
@@ -264,6 +265,8 @@ static int check_unaligned_access_all_cpus(void)
{
bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();

There was talks about Zicclsm, but spike doesnt have support for Zicclsm afaik, but I was wondering if i should add Zicclsm to cpufeature and aswell.

If anyone wants to run this i tested with
spike --misaligned --isa=RV64IMAFDCV_zicntr_zihpm --kernel=arch/riscv/boot/Image opensbi/build/platform/generic/firmware/fw_jump.elf


Thanks,
Jesse Taube


+ check_vector_unaligned_access_all_cpus();
+
if (!all_cpus_emulated)
return check_unaligned_access_speed_all_cpus();
@@ -273,6 +276,7 @@ static int check_unaligned_access_all_cpus(void)
static int check_unaligned_access_all_cpus(void)
{
check_unaligned_access_emulated_all_cpus();
+ check_vector_unaligned_access_all_cpus();
return 0;
}