[PATCH 1/2] x86/mce: Include the PPIN in machine check records when it is available

From: Luck, Tony
Date: Thu Nov 17 2016 - 19:36:39 EST


From: Tony Luck <tony.luck@xxxxxxxxx>

Intel Xeons from Ivy Bridge onwards support a processor identification
number. On systems that have it, include it in the machine check record.
I'm told that this would be helpful for users that run large data centers
with multi-socket servers to keep track of which CPUs are seeing errors.

Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/include/asm/msr-index.h | 4 ++++
arch/x86/include/uapi/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 35 +++++++++++++++++++++++++++++++++++
3 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 78f3760ca1f2..710273c617b8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -37,6 +37,10 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)

/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 69a6e07e3149..eb6247a7009b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -28,6 +28,7 @@ struct mce {
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
+ __u64 ppin; /* Protected Processor Inventory Number */
};

#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7fdf453d895..eb9ce5023da3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
#include <linux/export.h>
#include <linux/jump_label.h>

+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
@@ -122,6 +123,9 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
*/
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);

+/* Some Intel Xeons support per socket protected processor inventory number */
+static bool have_ppin;
+
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
@@ -135,6 +139,8 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
+ if (have_ppin)
+ rdmsrl(MSR_PPIN, m->ppin);
}

DEFINE_PER_CPU(struct mce, injectm);
@@ -2134,8 +2140,37 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);

+static void mcheck_intel_ppin_init(void)
+{
+ unsigned long long msr_ppin_ctl;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_IVYBRIDGE_X:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SKYLAKE_X:
+ if (rdmsrl_safe(MSR_PPIN_CTL, &msr_ppin_ctl))
+ return;
+ if (msr_ppin_ctl == 1) {
+ pr_info("PPIN available but disabled\n");
+ return;
+ }
+ /* if PPIN is disabled, but not locked, try to enable */
+ if (msr_ppin_ctl == 0) {
+ wrmsrl_safe(MSR_PPIN_CTL, 2);
+ rdmsrl_safe(MSR_PPIN_CTL, &msr_ppin_ctl);
+ }
+ if (msr_ppin_ctl == 2)
+ have_ppin = 1;
+ }
+}
+
int __init mcheck_init(void)
{
+ mcheck_intel_ppin_init();
mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb);
mcheck_vendor_init_severity();
--
2.7.4