Re: [PATCH v3 3/3] x86: Zap TOP_OF_KERNEL_STACK_PADDING on x86_64
From: Brian Gerst
Date: Wed Mar 19 2025 - 15:21:33 EST
On Wed, Mar 19, 2025 at 3:10 AM Xin Li (Intel) <xin@xxxxxxxxx> wrote:
>
> Because task_pt_regs() is now just an alias of thread_info.user_pt_regs,
> and no matter whether FRED is enabled or not a user level event frame on
> x86_64 is always pushed from top of current task kernel stack, i.e.,
> '(unsigned long)task_stack_page(task) + THREAD_SIZE', there is no meaning
> to keep TOP_OF_KERNEL_STACK_PADDING on x86_64, thus remove it.
>
> Signed-off-by: Xin Li (Intel) <xin@xxxxxxxxx>
> ---
>
> Change in v2:
> * Rebase on latest tip/master.
> ---
> arch/x86/include/asm/fred.h | 2 +-
> arch/x86/include/asm/processor.h | 6 ++++--
> arch/x86/include/asm/thread_info.h | 10 ----------
> arch/x86/kernel/process.c | 3 +--
> 4 files changed, 6 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
> index 2a29e5216881..f9cca8c2c73e 100644
> --- a/arch/x86/include/asm/fred.h
> +++ b/arch/x86/include/asm/fred.h
> @@ -97,7 +97,7 @@ static __always_inline void fred_sync_rsp0(unsigned long rsp0)
>
> static __always_inline void fred_update_rsp0(void)
> {
> - unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
> + unsigned long rsp0 = task_top_of_stack(current);
>
> if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
> wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index a88ddf5614f2..3b7adefe05ab 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -656,8 +656,6 @@ extern unsigned long __end_init_stack[];
> */
> #define TOP_OF_INIT_STACK ((unsigned long)&__end_init_stack)
>
> -#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
> -
> /*
> * Note, this can't be converted to an inline function as this header
> * file defines 'struct thread_struct' which is used in the task_struct
> @@ -666,6 +664,9 @@ extern unsigned long __end_init_stack[];
> #define task_pt_regs(task) ((task)->thread_info.user_pt_regs)
>
> #ifdef CONFIG_X86_32
> +#define task_top_of_stack(task) ((unsigned long)task_stack_page(task) + THREAD_SIZE \
> + - TOP_OF_KERNEL_STACK_PADDING)
> +
> #define INIT_THREAD { \
> .sp0 = TOP_OF_INIT_STACK, \
> .sysenter_cs = __KERNEL_CS, \
> @@ -673,6 +674,7 @@ extern unsigned long __end_init_stack[];
>
> #else
> extern unsigned long __top_init_kernel_stack[];
> +#define task_top_of_stack(task) ((unsigned long)task_stack_page(task) + THREAD_SIZE)
>
> #define INIT_THREAD { \
> .sp = (unsigned long)&__top_init_kernel_stack, \
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 915a6839bd61..d8ccca04b4d9 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -30,10 +30,6 @@
> *
> * In vm86 mode, the hardware frame is much longer still, so add 16
> * bytes to make room for the real-mode segments.
> - *
> - * x86-64 has a fixed-length stack frame, but it depends on whether
> - * or not FRED is enabled. Future versions of FRED might make this
> - * dynamic, but for now it is always 2 words longer.
> */
> #ifdef CONFIG_X86_32
> # ifdef CONFIG_VM86
> @@ -41,12 +37,6 @@
> # else
> # define TOP_OF_KERNEL_STACK_PADDING 8
> # endif
> -#else /* x86-64 */
> -# ifdef CONFIG_X86_FRED
> -# define TOP_OF_KERNEL_STACK_PADDING (2 * 8)
> -# else
> -# define TOP_OF_KERNEL_STACK_PADDING 0
> -# endif
> #endif
>
> /*
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 58c1cd4ca60a..51020caac332 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -124,9 +124,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> */
> void arch_init_user_pt_regs(struct task_struct *tsk)
> {
> - unsigned long top_of_stack = (unsigned long)task_stack_page(tsk) + THREAD_SIZE;
> + unsigned long top_of_stack = task_top_of_stack(tsk);
>
> - top_of_stack -= TOP_OF_KERNEL_STACK_PADDING;
> tsk->thread_info.user_pt_regs = (struct pt_regs *)top_of_stack - 1;
> }
>
> --
> 2.48.1
>
I'm not sure it's worth fully removing TOP_OF_KERNEL_STACK_PADDING for
64-bit if it results in needing separate definitions of
task_top_of_stack(). Leaving it at zero is fine. The other changes
are fine though.
Brian Gerst