[PATCH 11/15] x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems

From: Borislav Petkov
Date: Mon Sep 12 2016 - 04:01:45 EST


From: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>

The MCA_IPID register uniquely identifies a bank's type and instance
on Scalable MCA systems. We should save the value of this register
in struct mce along with the other relevant error information. This
ensures that we can decode errors without relying on system software to
correlate the bank to the type.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@xxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: linux-edac <linux-edac@xxxxxxxxxxxxxxx>
Cc: x86-ml <x86@xxxxxxxxxx>
Link: http://lkml.kernel.org/r/1472680624-34221-1-git-send-email-Yazen.Ghannam@xxxxxxx
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
arch/x86/include/uapi/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++--
arch/x86/kernel/cpu/mcheck/mce_amd.c | 8 ++++++--
include/trace/events/mce.h | 5 ++++-
4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 8c75fbc94c3f..69a6e07e3149 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -27,6 +27,7 @@ struct mce {
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
+ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
};

#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7f11ea5b75fa..84cb9b5859e2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -582,8 +582,12 @@ static void mce_read_aux(struct mce *m, int i)
}
}

- if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV))
- m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+ if (mce_flags.smca) {
+ m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+
+ if (m->status & MCI_STATUS_SYNDV)
+ m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+ }
}

static bool memory_error(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 16766e09c2b7..d2f92ab5322f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -564,8 +564,12 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, m.addr);

- if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV))
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ if (mce_flags.smca) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+
+ if (m.status & MCI_STATUS_SYNDV)
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ }

mce_log(&m);

diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 8be5268caf28..70f02149808c 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -21,6 +21,7 @@ TRACE_EVENT(mce_record,
__field( u64, addr )
__field( u64, misc )
__field( u64, synd )
+ __field( u64, ipid )
__field( u64, ip )
__field( u64, tsc )
__field( u64, walltime )
@@ -40,6 +41,7 @@ TRACE_EVENT(mce_record,
__entry->addr = m->addr;
__entry->misc = m->misc;
__entry->synd = m->synd;
+ __entry->ipid = m->ipid;
__entry->ip = m->ip;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
@@ -52,10 +54,11 @@ TRACE_EVENT(mce_record,
__entry->cpuvendor = m->cpuvendor;
),

- TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
+ __entry->ipid,
__entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,
--
2.10.0