Re: [PATCH] arm64: Add ARM64_HAS_LSE2 CPU capability

From: tiantao (H)
Date: Fri Sep 06 2024 - 06:58:49 EST



在 2024/9/6 17:44, Mark Rutland 写道:
On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
full name of the Not-aligned access. nAA bit has two values:
0b0 Unaligned accesses by the specified instructions generate an
Alignment fault.
0b1 Unaligned accesses by the specified instructions do not generate
an Alignment fault.

this patch sets the nAA bit to 1,The following instructions will not
generate an Alignment fault if all bytes being accessed are not within
a single 16-byte quantity:
• LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
LDLARH.
• STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH

Signed-off-by: Tian Tao <tiantao6@xxxxxxxxxxxxx>
What is going to depend on this? Nothing in the kernel depends on being
able to make unaligned accesses with these instructions, and (since you
haven't added a HWCAP), userspace has no idea that these accesses won't
generate an alignment fault.

Mark.

I've come across a situation where the simplified code is as follows:

 long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,-1,0);

long new_address = address + 9;

 long *p = (long*) new_address;
 long v = -1;

 __atomic_store(p, &v, __ATOMIC_RELEASE);


coredump occurs after executing __atomic_store, but the user code can't be changed,

so I'm trying to enable NAA to solve this problem.

---
arch/arm64/Kconfig | 10 ++++++++++
arch/arm64/include/asm/sysreg.h | 1 +
arch/arm64/kernel/cpufeature.c | 18 ++++++++++++++++++
arch/arm64/tools/cpucaps | 1 +
4 files changed, 30 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 77d7ef0b16c2..7afe73ebcd79 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2023,6 +2023,16 @@ config ARM64_TLB_RANGE
The feature introduces new assembly instructions, and they were
support when binutils >= 2.30.
+config ARM64_LSE2_NAA
+ bool "Enable support for not-aligned access"
+ depends on AS_HAS_ARMV8_4
+ help
+ LSE2 is an extension to the original LSE (Large System Extensions) feature,
+ introduced in ARMv8.4.
+
+ Enable this feature will not generate an Alignment fault if all bytes being
+ accessed are not within a single 16-byte quantity.
+
endmenu # "ARMv8.4 architectural features"
menu "ARMv8.5 architectural features"
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8cced8aa75a9..42e3a1959aa8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -854,6 +854,7 @@
#define SCTLR_ELx_ENDB (BIT(13))
#define SCTLR_ELx_I (BIT(12))
#define SCTLR_ELx_EOS (BIT(11))
+#define SCTLR_ELx_nAA (BIT(6))
#define SCTLR_ELx_SA (BIT(3))
#define SCTLR_ELx_C (BIT(2))
#define SCTLR_ELx_A (BIT(1))
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646ecd3069fd..558869a7c7f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2299,6 +2299,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
}
#endif /* CONFIG_ARM64_MTE */
+#ifdef CONFIG_ARM64_LSE2_NAA
+static void cpu_enable_lse2(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(sctlr_el2, SCTLR_ELx_nAA, SCTLR_ELx_nAA);
+ isb();
+}
+#endif
+
static void user_feature_fixup(void)
{
if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
@@ -2427,6 +2435,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
},
#endif /* CONFIG_ARM64_LSE_ATOMICS */
+#ifdef CONFIG_ARM64_LSE2_NAA
+ {
+ .desc = "Support for not-aligned access",
+ .capability = ARM64_HAS_LSE2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_lse2,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, AT, IMP)
+ },
+#endif
{
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index ac3429d892b9..0c7c0a293574 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -41,6 +41,7 @@ HAS_HCX
HAS_LDAPR
HAS_LPA2
HAS_LSE_ATOMICS
+HAS_LSE2
HAS_MOPS
HAS_NESTED_VIRT
HAS_PAN
--
2.33.0


.