[PATCH] arm64/elf_hwcap: Add new flags for BFloat-16 extension

From: Anshuman Khandual
Date: Thu Dec 12 2019 - 05:06:44 EST


Expose the availability of the BFloat16 (BF16) format support in the CPUs.
BF16 is a new 16-bit floating point format different from the half
precision format defined by the IEEE-754-2008.

BF16 extensions add support for new instructions for both FP/SIMD and SVE.
Advertise these features individually to the userspace via ELF HWCAP.

Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
Cc: linux-doc@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
Documentation/arm64/elf_hwcaps.rst | 7 +++++++
arch/arm64/include/asm/hwcap.h | 2 ++
arch/arm64/include/asm/sysreg.h | 4 ++++
arch/arm64/include/uapi/asm/hwcap.h | 2 ++
arch/arm64/kernel/cpufeature.c | 5 +++++
arch/arm64/kernel/cpuinfo.c | 2 ++
6 files changed, 22 insertions(+)

diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 7fa3d21..776b2fe 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -204,6 +204,13 @@ HWCAP2_FRINT

Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.

+HWCAP2_BF16
+
+ Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0001.
+
+HWCAP2_SVEBF16
+
+ Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.

4. Unused AT_HWCAP bits
-----------------------
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 3d2f247..bd8bf48 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -86,6 +86,8 @@
#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
+#define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16)
+#define KERNEL_HWCAP_SVEBF16 __khwcap2_feature(SVEBF16)

/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6e919fa..6db3a9b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -553,6 +553,7 @@
#define ID_AA64ISAR0_AES_SHIFT 4

/* id_aa64isar1 */
+#define ID_AA64ISAR1_BF16_SHIFT 44
#define ID_AA64ISAR1_SB_SHIFT 36
#define ID_AA64ISAR1_FRINTTS_SHIFT 32
#define ID_AA64ISAR1_GPI_SHIFT 28
@@ -564,6 +565,7 @@
#define ID_AA64ISAR1_APA_SHIFT 4
#define ID_AA64ISAR1_DPB_SHIFT 0

+#define ID_AA64ISAR1_BF16 0x1
#define ID_AA64ISAR1_APA_NI 0x0
#define ID_AA64ISAR1_APA_ARCHITECTED 0x1
#define ID_AA64ISAR1_API_NI 0x0
@@ -607,12 +609,14 @@
/* id_aa64zfr0 */
#define ID_AA64ZFR0_SM4_SHIFT 40
#define ID_AA64ZFR0_SHA3_SHIFT 32
+#define ID_AA64ZFR0_BF16_SHIFT 20
#define ID_AA64ZFR0_BITPERM_SHIFT 16
#define ID_AA64ZFR0_AES_SHIFT 4
#define ID_AA64ZFR0_SVEVER_SHIFT 0

#define ID_AA64ZFR0_SM4 0x1
#define ID_AA64ZFR0_SHA3 0x1
+#define ID_AA64ZFR0_BF16 0x1
#define ID_AA64ZFR0_BITPERM 0x1
#define ID_AA64ZFR0_AES 0x1
#define ID_AA64ZFR0_AES_PMULL 0x2
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a1e72886..095090c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -65,5 +65,7 @@
#define HWCAP2_SVESM4 (1 << 6)
#define HWCAP2_FLAGM2 (1 << 7)
#define HWCAP2_FRINT (1 << 8)
+#define HWCAP2_BF16 (1 << 9)
+#define HWCAP2_SVEBF16 (1 << 10)

#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 04cf64e..f344cea 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -149,6 +149,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0),
ARM64_FTR_END,
};

@@ -186,6 +187,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0),
ARM64_FTR_END,
};

@@ -1652,6 +1655,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_BF16, CAP_HWCAP, KERNEL_HWCAP_BF16),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
@@ -1660,6 +1664,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
#endif
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_PTR_AUTH
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 56bba74..10121f5 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -84,6 +84,8 @@ static const char *const hwcap_str[] = {
"svesm4",
"flagm2",
"frint",
+ "bf16",
+ "svebf16",
NULL
};

--
2.7.4