[PATCH] x86/MCE, EDAC/mce_amd: Save all aux registers on SMCA systems
From: Yazen Ghannam
Date: Mon Apr 02 2018 - 15:57:30 EST
From: Yazen Ghannam <yazen.ghannam@xxxxxxx>
The Intel SDM and AMD APM both state that the auxiliary MCA registers
should be read if their respective valid bits are set in MCA_STATUS.
The Processor Programming Reference for AMD Fam17h systems has a new
recommendation that the auxiliary registers should be saved
unconditionally. This recommendation can be retroactively applied to
older AMD systems. However, we only need to apply this to SMCA systems
to avoid modifying behavior on older systems.
Define a separate function to save all auxiliary registers on SMCA
systems. Call this function from both the MCE handlers and the AMD LVT
interrupt handlers so that we don't duplicate code.
Print all auxiliary registers in EDAC/mce_amd. Don't restrict this to
SMCA systems in order to save a conditional and keep the format similar
between SMCA and non-SMCA systems.
Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
Links:
https://lkml.kernel.org/r/20180326191526.64314-1-Yazen.Ghannam@xxxxxxx
https://lkml.kernel.org/r/20180326191526.64314-2-Yazen.Ghannam@xxxxxxx
arch/x86/kernel/cpu/mcheck/mce-internal.h | 6 +++
arch/x86/kernel/cpu/mcheck/mce.c | 20 ++--------
arch/x86/kernel/cpu/mcheck/mce_amd.c | 65 +++++++++++++++++++++----------
drivers/edac/mce_amd.c | 12 ++----
4 files changed, 57 insertions(+), 46 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 374d1aa66952..67a2c7c095ca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -59,6 +59,12 @@ static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
#endif
+#ifdef CONFIG_X86_MCE_AMD
+bool smca_read_aux(struct mce *m, int bank);
+#else
+static inline bool smca_read_aux(struct mce *m, int bank) { return false; }
+#endif
+
void mce_timer_kick(unsigned long interval);
#ifdef CONFIG_ACPI_APEI
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf2880d0ed..6be63e9e067d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -639,6 +639,9 @@ static struct notifier_block mce_default_nb = {
*/
static void mce_read_aux(struct mce *m, int i)
{
+ if (smca_read_aux(m, i))
+ return;
+
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
@@ -653,23 +656,6 @@ static void mce_read_aux(struct mce *m, int i)
m->addr >>= shift;
m->addr <<= shift;
}
-
- /*
- * Extract [55:<lsb>] where lsb is the least significant
- * *valid* bit of the address bits.
- */
- if (mce_flags.smca) {
- u8 lsb = (m->addr >> 56) & 0x3f;
-
- m->addr &= GENMASK_ULL(55, lsb);
- }
- }
-
- if (mce_flags.smca) {
- m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
-
- if (m->status & MCI_STATUS_SYNDV)
- m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..b00d5fff1848 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -244,6 +244,47 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
}
}
+
+static bool _smca_read_aux(struct mce *m, int bank, bool read_addr)
+{
+ if (!mce_flags.smca)
+ return false;
+
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
+
+ /*
+ * We should already have a value if we're coming from the Threshold LVT
+ * interrupt handler. Otherwise, read it now.
+ */
+ if (!m->misc)
+ rdmsrl(msr_ops.misc(bank), m->misc);
+
+ /*
+ * Read MCA_ADDR if we don't have it already. We should already have it
+ * if we're coming from the interrupt handlers.
+ */
+ if (read_addr)
+ rdmsrl(msr_ops.addr(bank), m->addr);
+
+ /*
+ * Extract [55:<lsb>] where lsb is the least significant
+ * *valid* bit of the address bits.
+ */
+ if (m->addr) {
+ u8 lsb = (m->addr >> 56) & 0x3f;
+
+ m->addr &= GENMASK_ULL(55, lsb);
+ }
+
+ return true;
+}
+
+bool smca_read_aux(struct mce *m, int bank)
+{
+ return _smca_read_aux(m, bank, true);
+}
+
struct thresh_restart {
struct threshold_block *b;
int reset;
@@ -799,30 +840,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
mce_setup(&m);
m.status = status;
+ m.addr = addr;
m.misc = misc;
m.bank = bank;
m.tsc = rdtsc();
- if (m.status & MCI_STATUS_ADDRV) {
- m.addr = addr;
-
- /*
- * Extract [55:<lsb>] where lsb is the least significant
- * *valid* bit of the address bits.
- */
- if (mce_flags.smca) {
- u8 lsb = (m.addr >> 56) & 0x3f;
-
- m.addr &= GENMASK_ULL(55, lsb);
- }
- }
-
- if (mce_flags.smca) {
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
-
- if (m.status & MCI_STATUS_SYNDV)
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
- }
+ _smca_read_aux(&m, bank, false);
mce_log(&m);
}
@@ -849,7 +872,7 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
if (!(status & MCI_STATUS_VAL))
return false;
- if (status & MCI_STATUS_ADDRV)
+ if ((status & MCI_STATUS_ADDRV) || mce_flags.smca)
rdmsrl(msr_addr, addr);
__log_error(bank, status, addr, misc);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 2ab4d61ee47e..d7badf80f13f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -990,16 +990,12 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
pr_cont("]: 0x%016llx\n", m->status);
- if (m->status & MCI_STATUS_ADDRV)
- pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
+ pr_emerg(HW_ERR "Error Addr: 0x%016llx, Misc: 0x%016llx\n",
+ m->addr, m->misc);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
- pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
-
- if (m->status & MCI_STATUS_SYNDV)
- pr_cont(", Syndrome: 0x%016llx", m->synd);
-
- pr_cont("\n");
+ pr_emerg(HW_ERR "IPID: 0x%016llx, Syndrome: 0x%016llx\n",
+ m->ipid, m->synd);
decode_smca_error(m);
goto err_code;
--
2.14.1