[tip:x86/asm] x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack() and fix it on x86_32

From: tip-bot for Andy Lutomirski
Date: Sat Mar 07 2015 - 03:38:26 EST


Commit-ID: a7fcf28d431ef70afaa91496e64e16dc51dccec4
Gitweb: http://git.kernel.org/tip/a7fcf28d431ef70afaa91496e64e16dc51dccec4
Author: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
AuthorDate: Fri, 6 Mar 2015 17:50:19 -0800
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Sat, 7 Mar 2015 09:34:03 +0100

x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack() and fix it on x86_32

I broke 32-bit kernels. The implementation of sp0 was correct
as far as I can tell, but sp0 was much weirder on x86_32 than I
realized. It has the following issues:

- Init's sp0 is inconsistent with everything else's: non-init tasks
are offset by 8 bytes. (I have no idea why, and the comment is unhelpful.)

- vm86 does crazy things to sp0.

Fix it up by replacing this_cpu_sp0() with
current_top_of_stack() and using a new percpu variable to track
the top of the stack on x86_32.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Fixes: 75182b1632a8 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()")
Link: http://lkml.kernel.org/r/d09dbe270883433776e0cbee3c7079433349e96d.1425692936.git.luto@xxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/include/asm/processor.h | 11 ++++++++++-
arch/x86/include/asm/thread_info.h | 4 +---
arch/x86/kernel/cpu/common.c | 13 +++++++++++--
arch/x86/kernel/process_32.c | 11 +++++++----
arch/x86/kernel/smpboot.c | 2 ++
arch/x86/kernel/traps.c | 4 ++--
6 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f5e3ec6..48a61c1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -284,6 +284,10 @@ struct tss_struct {

DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);

+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
+
/*
* Save the original ist values for checking stack pointers during debugging
*/
@@ -564,9 +568,14 @@ static inline void native_swapgs(void)
#endif
}

-static inline unsigned long this_cpu_sp0(void)
+static inline unsigned long current_top_of_stack(void)
{
+#ifdef CONFIG_X86_64
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+ /* sp0 on x86_32 is special in and around vm86 mode. */
+ return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
}

#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a2fa189..7740edd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -158,9 +158,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);

static inline struct thread_info *current_thread_info(void)
{
- struct thread_info *ti;
- ti = (void *)(this_cpu_sp0() - THREAD_SIZE);
- return ti;
+ return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
}

static inline unsigned long current_stack_pointer(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5d0f0cc..7634833 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1130,8 +1130,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;

/*
- * The following four percpu variables are hot. Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot. Align current_task to
+ * cacheline size such that they fall in the same cacheline.
*/
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
@@ -1226,6 +1226,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);

+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack. Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+ (unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0405cab..1b9963f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -306,13 +306,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p);

/*
- * Reload esp0. This changes current_thread_info().
+ * Reload esp0, kernel_stack, and current_top_of_stack. This changes
+ * current_thread_info().
*/
load_sp0(tss, next);
-
this_cpu_write(kernel_stack,
- (unsigned long)task_stack_page(next_p) +
- THREAD_SIZE - KERNEL_STACK_OFFSET);
+ (unsigned long)task_stack_page(next_p) +
+ THREAD_SIZE - KERNEL_STACK_OFFSET);
+ this_cpu_write(cpu_current_top_of_stack,
+ (unsigned long)task_stack_page(next_p) +
+ THREAD_SIZE);

/*
* Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index febc6aa..759388c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -806,6 +806,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
+ per_cpu(cpu_current_top_of_stack, cpu) =
+ (unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fa29058..081252c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -174,8 +174,8 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault.
*/
- BUG_ON((unsigned long)(this_cpu_sp0() - current_stack_pointer()) >=
- THREAD_SIZE);
+ BUG_ON((unsigned long)(current_top_of_stack() -
+ current_stack_pointer()) >= THREAD_SIZE);

preempt_count_sub(HARDIRQ_OFFSET);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/