[PATCH V4 6/6] x86/entry/32: Introduce cpu_current_thread_sp0 to replace cpu_tss_rw.x86_tss.sp1

From: Lai Jiangshan
Date: Wed Feb 10 2021 - 07:43:11 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

TSS sp1 is not used by hardware and is used as a copy of thread.sp0.

It should just use a percpu variable instead, so we introduce
cpu_current_thread_sp0 for it.

And we remove the unneeded TSS_sp1.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
arch/x86/entry/entry_32.S | 6 +++---
arch/x86/include/asm/processor.h | 2 ++
arch/x86/include/asm/switch_to.h | 2 +-
arch/x86/kernel/asm-offsets.c | 1 -
arch/x86/kernel/cpu/common.c | 9 ++++++++-
arch/x86/kernel/process.c | 2 --
6 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d5b5b43fd0c0..55dcf5c35141 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -472,7 +472,7 @@
movl %esp, %esi

/* Load top of task-stack into %edi */
- movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %edi
+ movl PER_CPU_VAR(cpu_current_thread_sp0), %edi

/* Special case - entry from kernel mode via entry stack */
#ifdef CONFIG_VM86
@@ -658,7 +658,7 @@
movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi

/* Bytes on the task-stack to ecx */
- movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx
+ movl PER_CPU_VAR(cpu_current_thread_sp0), %ecx
subl %esi, %ecx

/* Allocate stack-frame on entry-stack */
@@ -916,7 +916,7 @@ SYM_FUNC_START(entry_SYSENTER_32)

/* Switch to task stack */
movl %esp, %eax
- movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %esp
+ movl PER_CPU_VAR(cpu_current_thread_sp0), %esp

.Lsysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e197de05d0aa..a40bade32105 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -776,6 +776,8 @@ static inline void spin_lock_prefetch(const void *x)

#define KSTK_ESP(task) (task_pt_regs(task)->sp)

+DECLARE_PER_CPU(unsigned long, cpu_current_thread_sp0);
+
#else
#define INIT_THREAD { }

diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index f0ba06bcba0b..eb0d3ae8a54d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -69,7 +69,7 @@ static inline void update_task_stack(struct task_struct *task)
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task->thread.sp0);
else
- this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
+ this_cpu_write(cpu_current_thread_sp0, task->thread.sp0);
#else
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 60b9f42ce3c1..3b63b6062792 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -98,6 +98,5 @@ static void __used common(void)

/* Offset for fields in tss_struct */
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
- OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9c531ec73f5c..86485d55949e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1792,12 +1792,19 @@ EXPORT_PER_CPU_SYMBOL(__preempt_count);
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
- * top of the kernel stack directly.
+ * top of the kernel stack directly and an percpu variable to track the
+ * thread.sp0 for using in entry code. cpu_current_top_of_stack and
+ * cpu_current_thread_sp0 are different value because of the non-zero
+ * stack-padding on 32bit. See more comment at TOP_OF_KERNEL_STACK_PADDING
+ * and vm86.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);

+DEFINE_PER_CPU(unsigned long, cpu_current_thread_sp0) = TOP_OF_INIT_STACK;
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_current_thread_sp0);
+
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 296de77da4b2..e6d4b5399a81 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -64,8 +64,6 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,

#ifdef CONFIG_X86_32
- .sp1 = TOP_OF_INIT_STACK,
-
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
#endif
--
2.19.1.6.gb485710b