[GIT pull] x86 fixes for 4.15

From: Thomas Gleixner
Date: Sun Jan 28 2018 - 05:37:20 EST


Linus,

please pull the latest x86-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

A set of small fixes for 4.15:

- Fix vmapped stack synchronization on systems with 4-level paging and a
large amount of memory caused by a missing 5-level folding which made
the pgd synchronization logic to fail and causing double faults.

- Add a missing sanity check in the vmalloc_fault() logic on 5-level
paging systems.

- Bring back protection against accessing a freed initrd in the microcode
loader which was lost by a wrong merge conflict resolution.

- Extend the Broadwell micro code loading sanity check.

- Add a missing ENDPROC annotation in ftrace assembly code which makes ORC
unhappy.

- Prevent loading the AMD power module on !AMD platforms. The load itself
is uncritical, but an unload attempt results in a kernel crash.

- Update Peter Anvins role in the MAINTAINERS file.

Thanks,

tglx

------------------>
Andy Lutomirski (2):
x86/mm/64: Fix vmapped stack syncing on very-large-memory 4-level systems
x86/mm/64: Tighten up vmalloc_fault() sanity checks on 5-level kernels

Borislav Petkov (1):
x86/microcode: Fix again accessing initrd after having been freed

H. Peter Anvin (1):
x86: Mark hpa as a "Designated Reviewer" for the time being

Jia Zhang (1):
x86/microcode/intel: Extend BDW late-loading further with LLC size check

Josh Poimboeuf (1):
x86/ftrace: Add one more ENDPROC annotation

Xiao Liang (1):
perf/x86/amd/power: Do not load AMD power module on !AMD platforms


MAINTAINERS | 12 +-----------
arch/x86/events/amd/power.c | 2 +-
arch/x86/kernel/cpu/microcode/core.c | 2 +-
arch/x86/kernel/cpu/microcode/intel.c | 20 ++++++++++++++++++--
arch/x86/kernel/ftrace_64.S | 2 +-
arch/x86/mm/fault.c | 22 +++++++++-------------
arch/x86/mm/tlb.c | 34 +++++++++++++++++++++++++++++-----
7 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e3581413420c..94976349ff61 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6609,16 +6609,6 @@ L: linux-i2c@xxxxxxxxxxxxxxx
S: Maintained
F: drivers/i2c/i2c-stub.c

-i386 BOOT CODE
-M: "H. Peter Anvin" <hpa@xxxxxxxxx>
-S: Maintained
-F: arch/x86/boot/
-
-i386 SETUP CODE / CPU ERRATA WORKAROUNDS
-M: "H. Peter Anvin" <hpa@xxxxxxxxx>
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/hpa/linux-2.6-x86setup.git
-S: Maintained
-
IA64 (Itanium) PLATFORM
M: Tony Luck <tony.luck@xxxxxxxxx>
M: Fenghua Yu <fenghua.yu@xxxxxxxxx>
@@ -14858,7 +14848,7 @@ F: net/x25/
X86 ARCHITECTURE (32-BIT AND 64-BIT)
M: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
M: Ingo Molnar <mingo@xxxxxxxxxx>
-M: "H. Peter Anvin" <hpa@xxxxxxxxx>
+R: "H. Peter Anvin" <hpa@xxxxxxxxx>
M: x86@xxxxxxxxxx
L: linux-kernel@xxxxxxxxxxxxxxx
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index a6eee5ac4f58..2aefacf5c5b2 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void)
int ret;

if (!x86_match_cpu(cpu_match))
- return 0;
+ return -ENODEV;

if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c4fa4a85d4cb..e4fc595cd6ea 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void)
break;
case X86_VENDOR_AMD:
if (c->x86 >= 0x10)
- return save_microcode_in_initrd_amd(cpuid_eax(1));
+ ret = save_microcode_in_initrd_amd(cpuid_eax(1));
break;
default:
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index d9e460fc7a3b..f7c55b0e753a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *intel_ucode_patch;

+/* last level cache size per core */
+static int llc_size_per_core;
+
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
unsigned int s2, unsigned int p2)
{
@@ -912,12 +915,14 @@ static bool is_blacklisted(unsigned int cpu)

/*
* Late loading on model 79 with microcode revision less than 0x0b000021
- * may result in a system hang. This behavior is documented in item
- * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
*/
if (c->x86 == 6 &&
c->x86_model == INTEL_FAM6_BROADWELL_X &&
c->x86_mask == 0x01 &&
+ llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
@@ -975,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = {
.apply_microcode = apply_microcode_intel,
};

+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -985,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void)
return NULL;
}

+ llc_size_per_core = calc_llc_size_per_core(c);
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 7cb8ba08beb9..8774fd2ed390 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -291,7 +291,7 @@ trace:
restore_mcount_regs

jmp fgraph_trace
-END(function_hook)
+ENDPROC(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b3e40773dce0..800de815519c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -439,18 +439,13 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_ref))
return -1;

- if (pgd_none(*pgd)) {
- set_pgd(pgd, *pgd_ref);
- arch_flush_lazy_mmu_mode();
- } else if (CONFIG_PGTABLE_LEVELS > 4) {
- /*
- * With folded p4d, pgd_none() is always false, so the pgd may
- * point to an empty page table entry and pgd_page_vaddr()
- * will return garbage.
- *
- * We will do the correct sanity check on the p4d level.
- */
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (pgd_none(*pgd)) {
+ set_pgd(pgd, *pgd_ref);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
}

/* With 4-level paging, copying happens on the p4d level. */
@@ -459,7 +454,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_ref))
return -1;

- if (p4d_none(*p4d)) {
+ if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) {
set_p4d(p4d, *p4d_ref);
arch_flush_lazy_mmu_mode();
} else {
@@ -470,6 +465,7 @@ static noinline int vmalloc_fault(unsigned long address)
* Below here mismatches are bugs because these lower tables
* are shared:
*/
+ BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);

pud = pud_offset(p4d, address);
pud_ref = pud_offset(p4d_ref, address);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a1561957dccb..5bfe61a5e8e3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -151,6 +151,34 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}

+static void sync_current_stack_to_mm(struct mm_struct *mm)
+{
+ unsigned long sp = current_stack_pointer;
+ pgd_t *pgd = pgd_offset(mm, sp);
+
+ if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (unlikely(pgd_none(*pgd))) {
+ pgd_t *pgd_ref = pgd_offset_k(sp);
+
+ set_pgd(pgd, *pgd_ref);
+ }
+ } else {
+ /*
+ * "pgd" is faked. The top level entries are "p4d"s, so sync
+ * the p4d. This compiles to approximately the same code as
+ * the 5-level case.
+ */
+ p4d_t *p4d = p4d_offset(pgd, sp);
+
+ if (unlikely(p4d_none(*p4d))) {
+ pgd_t *pgd_ref = pgd_offset_k(sp);
+ p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
+
+ set_p4d(p4d, *p4d_ref);
+ }
+ }
+}
+
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -226,11 +254,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int index = pgd_index(current_stack_pointer);
- pgd_t *pgd = next->pgd + index;
-
- if (unlikely(pgd_none(*pgd)))
- set_pgd(pgd, init_mm.pgd[index]);
+ sync_current_stack_to_mm(next);
}

/* Stop remote flushes for the previous mm */