[PATCH] [RFC] x86: work around MPX Erratum

From: Dave Hansen
Date: Mon May 02 2016 - 18:03:50 EST



From: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>

Big core processors (Xeon/Core) are affected by an MPX erratum.
This erratum can only be triggered when a system is not using
Supervisor Mode Execution Prevention (SMEP). To work around
this, we ensure that MPX can only be used in cases where SMEP is
present in the processor and enabled.

MPX and SMEP are present together in the *vast* majority of
cases, and the kernel does not generally execute code that is
readable by userspace, so the real-world impact of this issue is
expected to be very limited.

Note that we don't have a good way to tell if we are on an
unaffected Atom processor or an affected Core/Xeon. The
solution for the moment is to just be conservative and assume
that everything is affected unless explicitly known to be
unaffected.

More details on erratum:

http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf

SKD046 Branch Instructions May Initialize MPX Bound Registers Incorrectly

Problem:

Depending on the current Intel MPX (Memory Protection
Extensions) configuration, execution of certain branch
instructions (near CALL, near RET, near JMP, and Jcc
instructions) without a BND prefix (F2H) initialize the MPX bound
registers. Due to this erratum, such a branch instruction that is
executed both with CPL = 3 and with CPL < 3 may not use the
correct MPX configuration register (BNDCFGU or BNDCFGS,
respectively) for determining whether to initialize the bound
registers; it may thus initialize the bound registers when it
should not, or fail to initialize them when it should.

Implication:

A branch instruction that has executed both in user mode and in
supervisor mode (from the same linear address) may cause a #BR
(bound range fault) when it should not have or may not cause a
#BR when it should have. Workaround An operating system can
avoid this erratum by setting CR4.SMEP[bit 20] to enable
supervisor-mode execution prevention (SMEP). When SMEP is
enabled, no code can be executed both with CPL = 3 and with CPL <
3.

Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: x86 maintainers <x86@xxxxxxxxxx>
---

b/arch/x86/include/asm/bugs.h | 9 ++++--
b/arch/x86/kernel/cpu/common.c | 3 ++
b/arch/x86/kernel/cpu/intel.c | 58 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 68 insertions(+), 2 deletions(-)

diff -puN arch/x86/kernel/cpu/intel.c~skl-mpx-errata-exclude-atom arch/x86/kernel/cpu/intel.c
--- a/arch/x86/kernel/cpu/intel.c~skl-mpx-errata-exclude-atom 2016-05-02 14:02:50.133482253 -0700
+++ b/arch/x86/kernel/cpu/intel.c 2016-05-02 15:02:20.066199821 -0700
@@ -25,6 +25,62 @@
#include <asm/apic.h>
#endif

+/*
+ * Just in case our CPU detection goes bad, allow a way to
+ * override the disabling of MPX.
+ */
+static int forcempx;
+static int __init forcempx_setup(char *__unused)
+{
+ forcempx = 1;
+ return 1;
+}
+__setup("intel-skd-046-workaround=disable", forcempx_setup);
+
+/*
+ * x86_model values come from: SDM Vol 3. Chapter 35
+ */
+static int is_mpx_affected_microarch(struct cpuinfo_x86 *c)
+{
+ /* Only family 6 is affected */
+ if (c->x86 != 0x6)
+ return 0;
+
+ /* We know these Atom models are unaffected, for sure */
+ switch (c->x86_model) {
+ case 0x5F: /* "Future Intel Atom ... Goldmont */
+ case 0x5C: /* "Future Intel Atom ... Goldmont */
+ return 0;
+ }
+ /*
+ * We will get here on future unknown processors and all
+ * Core/Xeons. They might be unaffected Atoms or
+ * affected Core/Xeons. Be conservative and assume
+ * processor is affected.
+ *
+ * Once the complete list of Core/Xeon models is known
+ * it can be added here, and the Atom list removed.
+ */
+ return 1;
+}
+
+void check_mpx_erratum(struct cpuinfo_x86 *c)
+{
+ if (forcempx)
+ return;
+ /*
+ * Turn off MPX feature on affected CPUs where SMEP is not
+ * available or disabled.
+ *
+ * Works around Intel Erratum: "SKD046 Branch Instructions
+ * May Initialize MPX Bound Registers Incorrectly."
+ */
+ if (is_mpx_affected_microarch(c) &&
+ cpu_has(c, X86_FEATURE_MPX) &&
+ !cpu_has(c, X86_FEATURE_SMEP))
+ setup_clear_cpu_cap(X86_FEATURE_MPX);
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -173,6 +229,8 @@ static void early_init_intel(struct cpui
if (edx & (1U << 28))
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
}
+
+ check_mpx_erratum(c);
}

#ifdef CONFIG_X86_32
diff -puN arch/x86/kernel/cpu/common.c~skl-mpx-errata-exclude-atom arch/x86/kernel/cpu/common.c
--- a/arch/x86/kernel/cpu/common.c~skl-mpx-errata-exclude-atom 2016-05-02 14:02:50.136482389 -0700
+++ b/arch/x86/kernel/cpu/common.c 2016-05-02 14:02:50.157483342 -0700
@@ -37,6 +37,7 @@
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
+#include <asm/bugs.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_ser
static __init int setup_disable_smep(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMEP);
+ /* also check for things that depend on SMEP being enabled */
+ check_mpx_erratum(&boot_cpu_data);
return 1;
}
__setup("nosmep", setup_disable_smep);
diff -puN arch/x86/include/asm/bugs.h~skl-mpx-errata-exclude-atom arch/x86/include/asm/bugs.h
--- a/arch/x86/include/asm/bugs.h~skl-mpx-errata-exclude-atom 2016-05-02 14:02:50.152483115 -0700
+++ b/arch/x86/include/asm/bugs.h 2016-05-02 14:02:50.156483296 -0700
@@ -3,10 +3,15 @@

extern void check_bugs(void);

-#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
+#if defined(CONFIG_CPU_SUP_INTEL)
+void check_mpx_erratum(struct cpuinfo_x86 *c);
+#else
+static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
+#if defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
static inline int ppro_with_ram_bug(void) { return 0; }
-#endif
+#endif /* CONFIG_X86_32 */
+#endif /* CONFIG_CPU_SUP_INTEL */

#endif /* _ASM_X86_BUGS_H */
_