[PATCH 1/7] x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers

From: Borislav Petkov
Date: Wed May 11 2016 - 09:00:34 EST


From: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>

Scalable MCA provides new registers for all banks for logging deferred
errors: MCA_DESTAT and MCA_DEADDR. Deferred errors are always logged to
these registers.

Update the AMD deferred error handler to use these registers, if
available.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@xxxxxxxxx>
Cc: linux-edac <linux-edac@xxxxxxxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: x86-ml <x86@xxxxxxxxxx>
Link: http://lkml.kernel.org/r/1462377407-1446-2-git-send-email-Yazen.Ghannam@xxxxxxx
[ Sanity-check __log_error() args, massage a bit. ]
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
arch/x86/include/asm/mce.h | 4 ++++
arch/x86/kernel/cpu/mcheck/mce_amd.c | 32 ++++++++++++++++++++++++--------
2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 53ab69704771..8bf766ef0e18 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -110,6 +110,8 @@
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
+#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
@@ -117,6 +119,8 @@
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))

/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index c594680c36f2..d1b1e62f7cb9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -430,12 +430,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}

-static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+static void
+__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
+ u32 msr_status = msr_ops.status(bank);
+ u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;

- rdmsrl(msr_ops.status(bank), status);
+ WARN_ON_ONCE(deferred_err && threshold_err);
+
+ if (deferred_err && mce_flags.smca) {
+ msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
+ msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
+ }
+
+ rdmsrl(msr_status, status);
+
if (!(status & MCI_STATUS_VAL))
return;

@@ -448,10 +459,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc;

if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(msr_ops.addr(bank), m.addr);
+ rdmsrl(msr_addr, m.addr);

mce_log(&m);
- wrmsrl(msr_ops.status(bank), 0);
+
+ wrmsrl(msr_status, 0);
}

static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +491,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- u64 status;
unsigned int bank;
+ u32 msr_status;
+ u64 status;

for (bank = 0; bank < mca_cfg.banks; ++bank) {
- rdmsrl(msr_ops.status(bank), status);
+ msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
+ : msr_ops.status(bank);
+
+ rdmsrl(msr_status, status);

if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;

- __log_error(bank, false, 0);
+ __log_error(bank, true, false, 0);
break;
}
}
@@ -544,7 +560,7 @@ static void amd_threshold_interrupt(void)
return;

log:
- __log_error(bank, true, ((u64)high << 32) | low);
+ __log_error(bank, false, true, ((u64)high << 32) | low);
}

/*
--
2.7.3