[GIT pull] ras fixes for 4.16
From: Thomas Gleixner
Date: Sun Mar 11 2018 - 12:20:00 EST
Linus,
please pull the latest ras-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git ras-urgent-for-linus
Two small fixes for RAS/MCE:
- Serialize sysfs changes to avoid concurrent modificaiton of underlying data
- Add microcode revision to Machine Check records. This should have been
there forever, but now with the broken microcode versions in the wild
it has become important.
Thanks,
tglx
------------------>
Seunghun Han (1):
x86/MCE: Serialize sysfs changes
Tony Luck (1):
x86/MCE: Save microcode revision in machine check records
arch/x86/include/uapi/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 26 ++++++++++++++++++++++++--
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 91723461dc1f..435db58a7bad 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -30,6 +30,7 @@ struct mce {
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
+ __u32 microcode;/* Microcode revision */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ff94d1e2dce..466f47301334 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
static DEFINE_MUTEX(mce_log_mutex);
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -130,6 +133,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+
+ m->microcode = boot_cpu_data.microcode;
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -262,7 +267,7 @@ static void __print_mce(struct mce *m)
*/
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
- cpu_data(m->extcpu).microcode);
+ m->microcode);
}
static void print_mce(struct mce *m)
@@ -2086,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2098,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2110,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2121,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2128,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
- ssize_t ret = device_store_int(s, attr, buf, size);
+ unsigned long old_check_interval = check_interval;
+ ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ if (check_interval < 1)
+ check_interval = 1;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}