[PATCH 4.4 55/56] x86/speculation/l1tf: Increase l1tf memory limit for Nehalem+

From: Greg Kroah-Hartman
Date: Mon Sep 17 2018 - 18:52:03 EST


4.4-stable review patch. If anyone has any objections, please let me know.

------------------


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

[upstream cc51e5428ea54f575d49cfcede1d4cb3a72b4ec4 for 4.4.
Note there might be still a trivial conflict with the backport
for b0a182f875689647b014bc01d36b340217792852, but should
be easy to resolve]

On Nehalem and newer core CPUs the CPU cache internally uses 44 bits
physical address space. The L1TF workaround is limited by this internal
cache address width, and needs to have one bit free there for the
mitigation to work.

Older client systems report only 36bit physical address space so the range
check decides that L1TF is not mitigated for a 36bit phys/32GB system with
some memory holes.

But since these actually have the larger internal cache width this warning
is bogus because it would only really be needed if the system had more than
43bits of memory.

Add a new internal x86_cache_bits field. Normally it is the same as the
physical bits field reported by CPUID, but for Nehalem and newerforce it to
be at least 44bits.

Change the L1TF memory size warning to use the new cache_bits field to
avoid bogus warnings and remove the bogus comment about memory size.

Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Reported-by: George Anchev <studio@xxxxxxxxxx>
Reported-by: Christopher Snowhill <kode54@xxxxxxxxx>
Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: Michael Hocko <mhocko@xxxxxxxx>
Cc: vbabka@xxxxxxx
Cc: stable@xxxxxxxxxxxxxxx
Link: https://lkml.kernel.org/r/20180824170351.34874-1-andi@xxxxxxxxxxxxxx
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/include/asm/processor.h | 4 ++-
arch/x86/kernel/cpu/bugs.c | 47 ++++++++++++++++++++++++++++++++++-----
arch/x86/kernel/cpu/common.c | 2 +
3 files changed, 47 insertions(+), 6 deletions(-)

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -104,6 +104,8 @@ struct cpuinfo_x86 {
__u8 x86_phys_bits;
/* CPUID returned core id bits: */
__u8 x86_coreid_bits;
+
+ __u8 x86_cache_bits;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
@@ -174,7 +176,7 @@ extern void cpu_detect(struct cpuinfo_x8

static inline unsigned long long l1tf_pfn_limit(void)
{
- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}

extern void early_cpu_init(void);
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -634,6 +634,46 @@ void x86_spec_ctrl_setup_ap(void)

#undef pr_fmt
#define pr_fmt(fmt) "L1TF: " fmt
+
+/*
+ * These CPUs all support 44bits physical address space internally in the
+ * cache but CPUID can report a smaller number of physical address bits.
+ *
+ * The L1TF mitigation uses the top most address bit for the inversion of
+ * non present PTEs. When the installed memory reaches into the top most
+ * address bit due to memory holes, which has been observed on machines
+ * which report 36bits physical address bits and have 32G RAM installed,
+ * then the mitigation range check in l1tf_select_mitigation() triggers.
+ * This is a false positive because the mitigation is still possible due to
+ * the fact that the cache uses 44bit internally. Use the cache bits
+ * instead of the reported physical bits and adjust them on the affected
+ * machines to 44bit if the reported bits are less than 44.
+ */
+static void override_cache_bits(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 6)
+ return;
+
+ switch (c->x86_model) {
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ if (c->x86_cache_bits < 44)
+ c->x86_cache_bits = 44;
+ break;
+ }
+}
+
static void __init l1tf_select_mitigation(void)
{
u64 half_pa;
@@ -641,16 +681,13 @@ static void __init l1tf_select_mitigatio
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;

+ override_cache_bits(&boot_cpu_data);
+
#if CONFIG_PGTABLE_LEVELS == 2
pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
return;
#endif

- /*
- * This is extremely unlikely to happen because almost all
- * systems have far more MAX_PA/2 than RAM can be fit into
- * DIMM slots.
- */
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -798,6 +798,8 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36;
#endif

+ c->x86_cache_bits = c->x86_phys_bits;
+
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);