[PATCH 02/15] x86/mce: Add support for new MCA_SYND register

From: Borislav Petkov
Date: Mon Sep 12 2016 - 04:01:27 EST


From: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>

Syndrome information is no longer contained in MCA_STATUS for SMCA
systems but in a new register - MCA_SYND.

Add a synd field to struct mce to hold MCA_SYND register value. Add it
to the end of struct mce to maintain compatibility with old versions of
mcelog. Also, add it to the respective tracepoint.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@xxxxxxxxx>
Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
Cc: linux-edac <linux-edac@xxxxxxxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: x86-ml <x86@xxxxxxxxxx>
Link: http://lkml.kernel.org/r/1467633035-32080-1-git-send-email-Yazen.Ghannam@xxxxxxx
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
arch/x86/include/asm/mce.h | 5 ++++-
arch/x86/include/uapi/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++
arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++
include/trace/events/mce.h | 6 ++++--
5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766ef0e18..21bc5a3a4c89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
#define MCI_STATUS_AR (1ULL<<55) /* Action required */

/* AMD-specific bits */
+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
+#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
-#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */

/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
@@ -119,6 +121,7 @@
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943341bf..8c75fbc94c3f 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,7 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
};

#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec849468..7f11ea5b75fa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -568,6 +568,7 @@ static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
+
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i));

@@ -580,6 +581,9 @@ static void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}
}
+
+ if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV))
+ m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
}

static bool memory_error(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 78b7681f7f66..419e0ee3b12f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -479,6 +479,9 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, m.addr);

+ if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV))
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+
mce_log(&m);

wrmsrl(msr_status, 0);
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef6baa8..8be5268caf28 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,7 @@ TRACE_EVENT(mce_record,
__field( u64, status )
__field( u64, addr )
__field( u64, misc )
+ __field( u64, synd )
__field( u64, ip )
__field( u64, tsc )
__field( u64, walltime )
@@ -38,6 +39,7 @@ TRACE_EVENT(mce_record,
__entry->status = m->status;
__entry->addr = m->addr;
__entry->misc = m->misc;
+ __entry->synd = m->synd;
__entry->ip = m->ip;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
@@ -50,11 +52,11 @@ TRACE_EVENT(mce_record,
__entry->cpuvendor = m->cpuvendor;
),

- TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
- __entry->addr, __entry->misc,
+ __entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,
__entry->cpuvendor, __entry->cpuid,
--
2.10.0