[patch 11/60] x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss

From: Thomas Gleixner
Date: Mon Dec 04 2017 - 12:03:11 EST


From: Andy Lutomirski <luto@xxxxxxxxxx>

A future patch will move SYSENTER_stack to the beginning of cpu_tss
to help detect overflow. Before this can happen, fix several code
paths that hardcode assumptions about the old layout.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Reviewed-by: Borislav Petkov <bp@xxxxxxx>
Reviewed-by: Dave Hansen <dave.hansen@xxxxxxxxx>
Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/d40a2c5ae4539d64090849a374f3169ec492f4e2.1511497875.git.luto@xxxxxxxxxx

---
arch/x86/include/asm/desc.h | 2 +-
arch/x86/include/asm/processor.h | 9 +++++++--
arch/x86/kernel/cpu/common.c | 8 ++++----
arch/x86/kernel/doublefault.c | 32 +++++++++++++++-----------------
arch/x86/kvm/vmx.c | 2 +-
arch/x86/power/cpu.c | 13 +++++++------
6 files changed, 35 insertions(+), 31 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -178,7 +178,7 @@ static inline void set_tssldt_descriptor
#endif
}

-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
{
struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,7 +163,7 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;

-extern struct tss_struct doublefault_tss;
+extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS];
extern __u32 cpu_caps_set[NCAPINTS];

@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir
write_cr3(__sme_pa(pgdir));
}

+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
#ifdef CONFIG_X86_32
/* This is the TSS defined by the hardware. */
struct x86_hw_tss {
@@ -323,7 +328,7 @@ struct x86_hw_tss {
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
#define INVALID_IO_BITMAP_OFFSET 0x8000

struct tss_struct {
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1582,7 +1582,7 @@ void cpu_init(void)
}
}

- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;

/*
* <= is required because the CPU will access up to
@@ -1601,7 +1601,7 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &t->x86_tss);
load_TR_desc();

load_mm_ldt(&init_mm);
@@ -1659,12 +1659,12 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &t->x86_tss);
load_TR_desc();

load_mm_ldt(&init_mm);

- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;

#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
cpu_relax();
}

-struct tss_struct doublefault_tss __cacheline_aligned = {
- .x86_tss = {
- .sp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+ .sp0 = STACK_START,
+ .ss0 = __KERNEL_DS,
+ .ldt = 0,
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,

- .ip = (unsigned long) doublefault_fn,
- /* 0x2 bit is always set */
- .flags = X86_EFLAGS_SF | 0x2,
- .sp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
- .fs = __KERNEL_PERCPU,
+ .ip = (unsigned long) doublefault_fn,
+ /* 0x2 bit is always set */
+ .flags = X86_EFLAGS_SF | 0x2,
+ .sp = STACK_START,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,

- .__cr3 = __pa_nodebug(swapper_pg_dir),
- }
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
};

/* dummy for do_double_fault() call */
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2291,7 +2291,7 @@ static void vmx_vcpu_load(struct kvm_vcp
* processors. See 22.2.4.
*/
vmcs_writel(HOST_TR_BASE,
- (unsigned long)this_cpu_ptr(&cpu_tss));
+ (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */

/*
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -165,12 +165,13 @@ static void fix_processor_context(void)
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
- set_tss_desc(cpu, t); /*
- * This just modifies memory; should not be
- * necessary. But... This is necessary, because
- * 386 hardware has concept of busy TSS or some
- * similar stupidity.
- */
+
+ /*
+ * This just modifies memory; should not be necessary. But... This is
+ * necessary, because 386 hardware has concept of busy TSS or some
+ * similar stupidity.
+ */
+ set_tss_desc(cpu, &t->x86_tss);

#ifdef CONFIG_X86_64
memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));