[tip:ras/core] x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems

From: tip-bot for Yazen Ghannam
Date: Tue Sep 13 2016 - 09:32:44 EST


Commit-ID: 5828c46f2c07b97d758da6dc6afd5c374768d44d
Gitweb: http://git.kernel.org/tip/5828c46f2c07b97d758da6dc6afd5c374768d44d
Author: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
AuthorDate: Mon, 12 Sep 2016 09:59:37 +0200
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Tue, 13 Sep 2016 15:23:12 +0200

x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems

The MCA_IPID register uniquely identifies a bank's type and instance
on Scalable MCA systems. We should save the value of this register
in struct mce along with the other relevant error information. This
ensures that we can decode errors without relying on system software to
correlate the bank to the type.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Link: http://lkml.kernel.org/r/1472680624-34221-1-git-send-email-Yazen.Ghannam@xxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

---
arch/x86/include/uapi/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++--
arch/x86/kernel/cpu/mcheck/mce_amd.c | 8 ++++++--
include/trace/events/mce.h | 5 ++++-
4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 8c75fbc..69a6e07 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -27,6 +27,7 @@ struct mce {
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
+ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
};

#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 91a179b..17e9ff0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -583,8 +583,12 @@ static void mce_read_aux(struct mce *m, int i)
}
}

- if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV))
- m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+ if (mce_flags.smca) {
+ m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+
+ if (m->status & MCI_STATUS_SYNDV)
+ m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+ }
}

static bool memory_error(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 16766e0..d2f92ab 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -564,8 +564,12 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, m.addr);

- if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV))
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ if (mce_flags.smca) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+
+ if (m.status & MCI_STATUS_SYNDV)
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ }

mce_log(&m);

diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 8be5268..70f0214 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -21,6 +21,7 @@ TRACE_EVENT(mce_record,
__field( u64, addr )
__field( u64, misc )
__field( u64, synd )
+ __field( u64, ipid )
__field( u64, ip )
__field( u64, tsc )
__field( u64, walltime )
@@ -40,6 +41,7 @@ TRACE_EVENT(mce_record,
__entry->addr = m->addr;
__entry->misc = m->misc;
__entry->synd = m->synd;
+ __entry->ipid = m->ipid;
__entry->ip = m->ip;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
@@ -52,10 +54,11 @@ TRACE_EVENT(mce_record,
__entry->cpuvendor = m->cpuvendor;
),

- TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
+ __entry->ipid,
__entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,