[GIT PULL] x86/cpufeature changes for v4.11
From: Ingo Molnar
Date: Mon Feb 20 2017 - 07:09:54 EST
Linus,
Please pull the latest x86-cpufeature-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-cpufeature-for-linus
# HEAD: 3bba73b1b7a88d88c3ea16b7914c13d475e4a87b x86/cpufeature: Move RING3MWAIT feature to avoid conflicts
The main changes in this cycle were related to enable ring-3 MONITOR/MWAIT
instructions support on supported CPUs, by Grzegorz Andrejczuk and Piotr Luc.
Thanks,
Ingo
------------------>
Grzegorz Andrejczuk (4):
x86/msr: Add MSR_MISC_FEATURE_ENABLES and RING3MWAIT bit
x86/elf: Add HWCAP2 to expose ring 3 MONITOR/MWAIT
x86/cpufeature: Add RING3MWAIT to CPU features
x86/cpufeature: Enable RING3MWAIT for Knights Landing
Piotr Luc (2):
x86/cpufeature: Add AVX512_VPOPCNTDQ feature
x86/cpufeature: Enable RING3MWAIT for Knights Mill
Thomas Gleixner (1):
x86/cpufeature: Move RING3MWAIT feature to avoid conflicts
Documentation/admin-guide/kernel-parameters.txt | 4 +++
arch/x86/include/asm/cpufeatures.h | 4 +--
arch/x86/include/asm/elf.h | 9 +++++
arch/x86/include/asm/msr-index.h | 5 +++
arch/x86/include/uapi/asm/hwcap2.h | 7 ++++
arch/x86/kernel/cpu/common.c | 3 ++
arch/x86/kernel/cpu/intel.c | 44 +++++++++++++++++++++++++
arch/x86/kernel/fpu/xstate.c | 1 +
tools/arch/x86/include/asm/cpufeatures.h | 1 +
9 files changed, 76 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/include/uapi/asm/hwcap2.h
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index be7c0d9506b1..cfbb3fc938f7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3563,6 +3563,10 @@
rhash_entries= [KNL,NET]
Set number of hash buckets for route cache
+ ring3mwait=disable
+ [KNL] Disable ring 3 MONITOR/MWAIT feature on supported
+ CPUs.
+
ro [KNL] Mount root device read-only on boot
rodata= [KNL]
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index eafee3161d1c..43c4ea9cd907 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -186,7 +186,7 @@
*
* Reuse free bits when adding new feature flags!
*/
-
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
@@ -288,6 +288,7 @@
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
@@ -320,5 +321,4 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index e7f155c3045e..9d49c18b5ea9 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -258,6 +258,15 @@ extern int force_personality32;
#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX])
+extern u32 elf_hwcap2;
+
+/*
+ * HWCAP2 supplies mask with kernel enabled CPU features, so that
+ * the application can discover that it can safely use them.
+ * The bits are defined in uapi/asm/hwcap2.h.
+ */
+#define ELF_HWCAP2 (elf_hwcap2)
+
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
intent than poking at uname or /proc/cpuinfo.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 710273c617b8..00293a94ffaf 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -543,6 +543,11 @@
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+/* MISC_FEATURE_ENABLES non-architectural features */
+#define MSR_MISC_FEATURE_ENABLES 0x00000140
+
+#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1
+
#define MSR_IA32_TSC_DEADLINE 0x000006E0
/* P4/Xeon+ specific */
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
new file mode 100644
index 000000000000..0bd2be5c7617
--- /dev/null
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_X86_HWCAP2_H
+#define _ASM_X86_HWCAP2_H
+
+/* MONITOR/MWAIT enabled in Ring 3 */
+#define HWCAP2_RING3MWAIT (1 << 0)
+
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bab7a8a4293..f879429cfcaa 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -35,6 +35,7 @@
#include <asm/desc.h>
#include <asm/fpu/internal.h>
#include <asm/mtrr.h>
+#include <asm/hwcap2.h>
#include <linux/numa.h>
#include <asm/asm.h>
#include <asm/bugs.h>
@@ -51,6 +52,8 @@
#include "cpu.h"
+u32 elf_hwcap2 __read_mostly;
+
/* all of these masks are initialized in setup_cpu_local_masks() */
cpumask_var_t cpu_initialized_mask;
cpumask_var_t cpu_callout_mask;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 203f860d2ab3..a4c4ff9b27e4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,8 @@
#include <asm/cpu.h>
#include <asm/intel-family.h>
#include <asm/microcode_intel.h>
+#include <asm/hwcap2.h>
+#include <asm/elf.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -62,6 +64,46 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
}
}
+static bool ring3mwait_disabled __read_mostly;
+
+static int __init ring3mwait_disable(char *__unused)
+{
+ ring3mwait_disabled = true;
+ return 0;
+}
+__setup("ring3mwait=disable", ring3mwait_disable);
+
+static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
+{
+ /*
+ * Ring 3 MONITOR/MWAIT feature cannot be detected without
+ * cpu model and family comparison.
+ */
+ if (c->x86 != 6)
+ return;
+ switch (c->x86_model) {
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
+ break;
+ default:
+ return;
+ }
+
+ if (ring3mwait_disabled) {
+ msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
+ MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+ return;
+ }
+
+ msr_set_bit(MSR_MISC_FEATURE_ENABLES,
+ MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+
+ set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+
+ if (c == &boot_cpu_data)
+ ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -560,6 +602,8 @@ static void init_intel(struct cpuinfo_x86 *c)
detect_vmx_virtcap(c);
init_intel_energy_perf(c);
+
+ probe_xeon_phi_r3mwait(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1d7770447b3e..35f7024aace5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -78,6 +78,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_PKU);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
}
/*
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index cddd5d06e1cb..3603556fa0d9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -280,6 +280,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */