[PATCH] x86/mce: Enable HSD131, HSM142, HSW131, BDM48, and HSM142

From: Prarit Bhargava
Date: Wed Feb 05 2020 - 07:58:43 EST


Intel Errata HSD131, HSM142, HSW131, and BDM48 report that
"spurious corrected errors may be logged in the IA32_MC0_STATUS register
with the valid field (bit 63) set, the uncorrected error field (bit 61)
not set, a Model Specific Error Code (bits [31:16]) of 0x000F, and
an MCA Error Code (bits [15:0]) of 0x0005."

Block these spurious errors from the console and logs.

Links to Intel Specification updates:
HSD131: https://www.intel.com/content/www/us/en/products/docs/processors/core/4th-gen-core-family-desktop-specification-update.html
HSM142: https://www.intel.com/content/www/us/en/products/docs/processors/core/4th-gen-core-family-mobile-specification-update.html
HSW131: https://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200v3-spec-update.html
BDM48: https://www.intel.com/content/www/us/en/products/docs/processors/core/5th-gen-core-family-spec-update.html

Signed-off-by: Alexander Krupp <centos@xxxxxxxxxxxx>
Signed-off-by: Prarit Bhargava <prarit@xxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-edac@xxxxxxxxxxxxxxx
---
arch/x86/kernel/cpu/mce/core.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c4f949611e4..d893cc764a06 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -121,6 +121,8 @@ static struct irq_work mce_irq_work;

static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);

+static int (*quirk_noprint)(struct mce *m);
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -232,6 +234,9 @@ struct mca_msr_regs msr_ops = {

static void __print_mce(struct mce *m)
{
+ if (quirk_noprint && quirk_noprint(m))
+ return;
+
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu,
(m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
@@ -1622,6 +1627,15 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
m->cs = regs->cs;
}

+static int quirk_spurious_ce_noprint(struct mce *m)
+{
+ if (m->bank == 0 &&
+ (m->status & 0xa0000000ffffffff) == 0x80000000000f0005)
+ return 1;
+
+ return 0;
+}
+
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
@@ -1696,6 +1710,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)

if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
+
+ if ((c->x86 == 6) &&
+ ((c->x86_model == 0x3c) || (c->x86_model == 0x3d) ||
+ (c->x86_model == 0x45) || (c->x86_model == 46))) {
+ pr_info("MCE errata HSD131, HSM142, HSW131, BDM48, or HSM142 enabled.\n");
+ quirk_noprint = quirk_spurious_ce_noprint;
+ }
}

if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
--
2.21.1