[PATCH v2 1/1] arm64: Add workaround for Fujitsu A64FX erratum 010001

From: Zhang, Lei
Date: Tue Jan 22 2019 - 04:05:55 EST


On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1),
memory accesses may cause undefined fault (Data abort,
DFSC=0b111111) due to the CPU Errata (Fujitsu #010001).

This patch introduces the workaround to the problem.
The workaround is to change the fault handler for Data abort
DFSC=0b111111 to ignore this undefined fault, which will only
affect the Fujitsu-A64FX.

Signed-off-by: Lei Zhang <zhang.lei@xxxxxxxxxxxxxx>
Tested-by: Lei Zhang <zhang.lei@xxxxxxxxxxxxxx>
---
Documentation/arm64/silicon-errata.txt | 1 +
arch/arm64/Kconfig | 13 +++++++++++++
arch/arm64/include/asm/cpucaps.h | 3 ++-
arch/arm64/include/asm/cputype.h | 4 ++++
arch/arm64/kernel/cpu_errata.c | 8 ++++++++
arch/arm64/mm/fault.c | 24 +++++++++++++++++++++++-
6 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 1f09d04..26d64e9 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -80,3 +80,4 @@ stable kernels.
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
+| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a4168d3..9c09b2b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -643,6 +643,19 @@ config QCOM_FALKOR_ERRATUM_E1041

If unsure, say Y.

+config FUJITSU_ERRATUM_010001
+ bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
+ default y
+ help
+ This option adds workaround for Fujitsu-A64FX erratum E#010001.
+ On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory accesses
+ may cause undefined fault (Data abort, DFSC=0b111111).
+ The workaround is to replace the fault handler for Data abort DFSC=0b111111
+ with a new one to ignore this undefined fault, which will only affect
+ the Fujitsu-A64FX.
+
+ If unsure, say Y.
+
endmenu


diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 82e9099..3a0b375 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -60,7 +60,8 @@
#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39
#define ARM64_HAS_GENERIC_AUTH_ARCH 40
#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41
+#define ARM64_WORKAROUND_FUJITSU_A64FX_0100001 42

-#define ARM64_NCAPS 42
+#define ARM64_NCAPS 43

#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 951ed1a..70203f9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -76,6 +76,7 @@
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
#define ARM_CPU_IMP_NVIDIA 0x4E
+#define ARM_CPU_IMP_FUJITSU 0x46

#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -104,6 +105,8 @@
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004

+#define FUJITSU_CPU_PART_A64FX 0x001
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -122,6 +125,7 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
+#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)

#ifndef __ASSEMBLY__

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9950bb0..fc0737f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -739,6 +739,14 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
},
#endif
+#ifdef CONFIG_FUJITSU_ERRATUM_010001
+ {
+ .desc = "Fujitsu erratum 010001",
+ .capability = ARM64_WORKAROUND_FUJITSU_A64FX_0100001,
+ ERRATA_MIDR_RANGE(MIDR_FUJITSU_A64FX, 0, 0, 1, 0),
+ },
+#endif
+
{
}
};
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index efb7b2c..37e4f18 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -666,6 +666,28 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 0;
}

+static int do_bad_unknown_63(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+ /*
+ * On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1),
+ * memory accesses may spuriously trigger data aborts with
+ * DFSC=0b111111.
+ */
+ if (IS_ENABLED(CONFIG_FUJITSU_ERRATUM_010001)) {
+ if (cpus_have_cap(ARM64_WORKAROUND_FUJITSU_A64FX_0100001)) {
+ return 0;
+ } else { /* cpu capabilities maybe not ready*/
+ unsigned int current_cpu_midr = read_cpuid_id();
+ const struct midr_range fujitsu_a64fx_midr_range = {
+ MIDR_FUJITSU_A64FX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, 0)
+ };
+ if (is_midr_in_range(current_cpu_midr, &fujitsu_a64fx_midr_range) == TRUE)
+ return 0;
+ }
+ }
+ return do_bad(addr, esr, regs);
+}
+
static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" },
{ do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" },
@@ -730,7 +752,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{ do_bad, SIGKILL, SI_KERNEL, "unknown 60" },
{ do_bad, SIGKILL, SI_KERNEL, "section domain fault" },
{ do_bad, SIGKILL, SI_KERNEL, "page domain fault" },
- { do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
+ { do_bad_unknown_63, SIGKILL, SI_KERNEL, "unknown 63" },
};

int handle_guest_sea(phys_addr_t addr, unsigned int esr)
--
1.8.3.1