[PATCH v2] x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss

From: Ingo Molnar
Date: Sun Nov 26 2017 - 08:48:56 EST



* Dave Hansen <dave.hansen@xxxxxxxxx> wrote:

> On 11/10/2017 08:05 PM, Andy Lutomirski wrote:
> > -struct tss_struct doublefault_tss __cacheline_aligned = {
> > - .x86_tss = {
> > - .sp0 = STACK_START,
> > - .ss0 = __KERNEL_DS,
> > - .ldt = 0,
> ...
> > +struct x86_hw_tss doublefault_tss __cacheline_aligned = {
> > + .sp0 = STACK_START,
> > + .ss0 = __KERNEL_DS,
> > + .ldt = 0,
> > + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
>
> FWIW, I really like the trend of renaming the hardware structures in
> such a way that it's clear that they *are* hardware structures.
>
> It might also be nice to reference the relevant SDM sections on the
> topic, or even to include a comment along the lines of how it get used.
> This chunk from the SDM is particularly relevant:
>
> "The TSS holds information important to 64-bit mode and that is not
> directly related to the task-switch mechanism."

That makes sense - I've updated this patch with the following description added to
struct x86_hw_tss:

+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS holds information important to 64-bit mode
+ * unrelated to the task-switch mechanism:
+ */

I have also added your Reviewed-by tag. Updated patch below.

Thanks,

Ingo

=====================>
Subject: x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss
From: Andy Lutomirski <luto@xxxxxxxxxx>
Date: Thu, 23 Nov 2017 20:32:52 -0800

A future patch will move SYSENTER_stack to the beginning of cpu_tss
to help detect overflow. Before this can happen, fix several code
paths that hardcode assumptions about the old layout.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
[ Updated the 'struct tss_struct' comments, as suggested by Dave Hansen. ]
Reviewed-by: Borislav Petkov <bp@xxxxxxx>
Reviewed-by: Dave Hansen <dave.hansen@xxxxxxxxx>
Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bpetkov@xxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/d40a2c5ae4539d64090849a374f3169ec492f4e2.1511497875.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/include/asm/desc.h | 2 +-
arch/x86/include/asm/processor.h | 9 +++++++--
arch/x86/kernel/cpu/common.c | 8 ++++----
arch/x86/kernel/doublefault.c | 32 +++++++++++++++-----------------
arch/x86/power/cpu.c | 13 +++++++------
5 files changed, 34 insertions(+), 30 deletions(-)

Index: tip/arch/x86/include/asm/desc.h
===================================================================
--- tip.orig/arch/x86/include/asm/desc.h
+++ tip/arch/x86/include/asm/desc.h
@@ -178,7 +178,7 @@ static inline void set_tssldt_descriptor
#endif
}

-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
{
struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
Index: tip/arch/x86/include/asm/processor.h
===================================================================
--- tip.orig/arch/x86/include/asm/processor.h
+++ tip/arch/x86/include/asm/processor.h
@@ -163,7 +163,7 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;

-extern struct tss_struct doublefault_tss;
+extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS];
extern __u32 cpu_caps_set[NCAPINTS];

@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir
write_cr3(__sme_pa(pgdir));
}

+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
#ifdef CONFIG_X86_32
/* This is the TSS defined by the hardware. */
struct x86_hw_tss {
@@ -323,7 +328,7 @@ struct x86_hw_tss {
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
#define INVALID_IO_BITMAP_OFFSET 0x8000

struct tss_struct {
Index: tip/arch/x86/kernel/cpu/common.c
===================================================================
--- tip.orig/arch/x86/kernel/cpu/common.c
+++ tip/arch/x86/kernel/cpu/common.c
@@ -1582,7 +1582,7 @@ void cpu_init(void)
}
}

- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;

/*
* <= is required because the CPU will access up to
@@ -1601,7 +1601,7 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &t->x86_tss);
load_TR_desc();

load_mm_ldt(&init_mm);
@@ -1659,12 +1659,12 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &t->x86_tss);
load_TR_desc();

load_mm_ldt(&init_mm);

- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;

#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
Index: tip/arch/x86/kernel/doublefault.c
===================================================================
--- tip.orig/arch/x86/kernel/doublefault.c
+++ tip/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
cpu_relax();
}

-struct tss_struct doublefault_tss __cacheline_aligned = {
- .x86_tss = {
- .sp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+ .sp0 = STACK_START,
+ .ss0 = __KERNEL_DS,
+ .ldt = 0,
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,

- .ip = (unsigned long) doublefault_fn,
- /* 0x2 bit is always set */
- .flags = X86_EFLAGS_SF | 0x2,
- .sp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
- .fs = __KERNEL_PERCPU,
+ .ip = (unsigned long) doublefault_fn,
+ /* 0x2 bit is always set */
+ .flags = X86_EFLAGS_SF | 0x2,
+ .sp = STACK_START,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,

- .__cr3 = __pa_nodebug(swapper_pg_dir),
- }
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
};

/* dummy for do_double_fault() call */
Index: tip/arch/x86/power/cpu.c
===================================================================
--- tip.orig/arch/x86/power/cpu.c
+++ tip/arch/x86/power/cpu.c
@@ -165,12 +165,13 @@ static void fix_processor_context(void)
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
- set_tss_desc(cpu, t); /*
- * This just modifies memory; should not be
- * necessary. But... This is necessary, because
- * 386 hardware has concept of busy TSS or some
- * similar stupidity.
- */
+
+ /*
+ * This just modifies memory; should not be necessary. But... This is
+ * necessary, because 386 hardware has concept of busy TSS or some
+ * similar stupidity.
+ */
+ set_tss_desc(cpu, &t->x86_tss);

#ifdef CONFIG_X86_64
memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));