[tip:ras/core] x86/mce: Add support for new MCA_SYND register
From: tip-bot for Yazen Ghannam
Date: Tue Sep 13 2016 - 09:28:50 EST
Commit-ID: db819d60f6720080150a365080ff656cf239f88f
Gitweb: http://git.kernel.org/tip/db819d60f6720080150a365080ff656cf239f88f
Author: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
AuthorDate: Mon, 12 Sep 2016 09:59:28 +0200
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Tue, 13 Sep 2016 15:23:06 +0200
x86/mce: Add support for new MCA_SYND register
Syndrome information is no longer contained in MCA_STATUS for SMCA
systems but in a new register - MCA_SYND.
Add a synd field to struct mce to hold MCA_SYND register value. Add it
to the end of struct mce to maintain compatibility with old versions of
mcelog. Also, add it to the respective tracepoint.
Signed-off-by: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Link: http://lkml.kernel.org/r/1467633035-32080-1-git-send-email-Yazen.Ghannam@xxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/include/asm/mce.h | 5 ++++-
arch/x86/include/uapi/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++
arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++
include/trace/events/mce.h | 6 ++++--
5 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766e..21bc5a3 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
/* AMD-specific bits */
+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
+#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
-#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
@@ -119,6 +121,7 @@
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943..8c75fbc 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,7 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7f3f0e1..91a179b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -569,6 +569,7 @@ static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
+
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i));
@@ -581,6 +582,9 @@ static void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}
}
+
+ if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV))
+ m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
}
static bool memory_error(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 78b7681..419e0ee 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -479,6 +479,9 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, m.addr);
+ if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV))
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+
mce_log(&m);
wrmsrl(msr_status, 0);
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef..8be5268 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,7 @@ TRACE_EVENT(mce_record,
__field( u64, status )
__field( u64, addr )
__field( u64, misc )
+ __field( u64, synd )
__field( u64, ip )
__field( u64, tsc )
__field( u64, walltime )
@@ -38,6 +39,7 @@ TRACE_EVENT(mce_record,
__entry->status = m->status;
__entry->addr = m->addr;
__entry->misc = m->misc;
+ __entry->synd = m->synd;
__entry->ip = m->ip;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
@@ -50,11 +52,11 @@ TRACE_EVENT(mce_record,
__entry->cpuvendor = m->cpuvendor;
),
- TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
- __entry->addr, __entry->misc,
+ __entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,
__entry->cpuvendor, __entry->cpuid,