Re: [RESEND PATCH v2 1/3] x86/fred: Allow variable-sized event frame

From: Brian Gerst
Date: Tue Mar 18 2025 - 09:55:01 EST


On Tue, Mar 18, 2025 at 3:20 AM Xin Li (Intel) <xin@xxxxxxxxx> wrote:
>
> A FRED event frame could contain different amount of information for
> different event types, or perhaps even for different instances of the
> same event type. Thus the size of an event frame pushed by a FRED CPU
> is not fixed and the address of the pt_regs structure that is used to
> save a user level context of current task is not at a fixed offset
> from top of current task kernel stack.
>
> Add a new field named 'user_pt_regs' in the thread_info structure to
> save the address of user level context pt_regs structure, thus to
> eliminate the need of any advance information of event frame size
> and allow a FRED CPU to push variable-sized event frame.
>
> For IDT user level event delivery, a pt_regs structure is pushed by
> hardware and software _always_ at a fixed offset from top of current
> task kernel stack, so simply initialize user_pt_regs to point to the
> pt_regs structure no matter whether one is pushed or not.
>
> While for FRED user level event delivery, user_pt_regs is updated with
> a pt_regs structure pointer generated in asm_fred_entrypoint_user().
>
> Suggested-by: H. Peter Anvin (Intel) <hpa@xxxxxxxxx>
> Signed-off-by: Xin Li (Intel) <xin@xxxxxxxxx>
> ---
> arch/x86/entry/entry_fred.c | 10 ++++++++++
> arch/x86/include/asm/processor.h | 12 ++++++------
> arch/x86/include/asm/thread_info.h | 9 ++++++---
> arch/x86/kernel/process.c | 22 ++++++++++++++++++++++
> include/linux/thread_info.h | 1 +
> kernel/fork.c | 6 ++++++
> 6 files changed, 51 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
> index f004a4dc74c2..a5f5bdd16ad8 100644
> --- a/arch/x86/entry/entry_fred.c
> +++ b/arch/x86/entry/entry_fred.c
> @@ -228,6 +228,16 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
> /* Invalidate orig_ax so that syscall_get_nr() works correctly */
> regs->orig_ax = -1;
>
> + /*
> + * A FRED event frame could contain different amount of information
> + * for different event types, or perhaps even for different instances
> + * of the same event type. Thus the size of an event frame pushed by
> + * a FRED CPU is not fixed and the address of the pt_regs structure
> + * that is used to save a user level context of current task is not
> + * at a fixed offset from top of current task stack.
> + */
> + current->thread_info.user_pt_regs = regs;
> +
> switch (regs->fred_ss.type) {
> case EVENT_TYPE_EXTINT:
> return fred_extint(regs);
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 5d2f7e5aff26..7ff3443eb57d 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -646,12 +646,12 @@ static __always_inline void prefetchw(const void *x)
>
> #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
>
> -#define task_pt_regs(task) \
> -({ \
> - unsigned long __ptr = (unsigned long)task_stack_page(task); \
> - __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
> - ((struct pt_regs *)__ptr) - 1; \
> -})
> +/*
> + * Note, this can't be converted to an inline function as this header
> + * file defines 'struct thread_struct' which is used in the task_struct
> + * structure definition.
> + */
> +#define task_pt_regs(task) ((task)->thread_info.user_pt_regs)
>
> #ifdef CONFIG_X86_32
> #define INIT_THREAD { \
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 9282465eea21..4372f171c65f 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -56,6 +56,7 @@
> */
> #ifndef __ASSEMBLER__
> struct task_struct;
> +struct pt_regs;
> #include <asm/cpufeature.h>
> #include <linux/atomic.h>
>
> @@ -66,11 +67,13 @@ struct thread_info {
> #ifdef CONFIG_SMP
> u32 cpu; /* current CPU */
> #endif
> + struct pt_regs *user_pt_regs;
> };
>
> -#define INIT_THREAD_INFO(tsk) \
> -{ \
> - .flags = 0, \
> +#define INIT_THREAD_INFO(tsk) \
> +{ \
> + .flags = 0, \
> + .user_pt_regs = (struct pt_regs *)TOP_OF_INIT_STACK - 1, \

Use __top_init_kernel_stack here.

> }
>
> #else /* !__ASSEMBLER__ */
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 91f6ff618852..58c1cd4ca60a 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -108,6 +108,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> return 0;
> }
>
> +/*
> + * Initialize thread_info.user_pt_regs for IDT event delivery.
> + *
> + * For IDT user level event delivery, a pt_regs structure is pushed by both
> + * hardware and software and always resides at a fixed offset from top of
> + * current task kernel stack, thus thread_info.user_pt_regs is a per-task
> + * constant and NEVER changes after initialization.
> + *
> + * While for FRED user level event delivery, user_pt_regs is updated in
> + * fred_entry_from_user() immediately after user level event delivery.
> + *
> + * Note: thread_info.user_pt_regs of the init task is initialized at build
> + * time.
> + */
> +void arch_init_user_pt_regs(struct task_struct *tsk)
> +{
> + unsigned long top_of_stack = (unsigned long)task_stack_page(tsk) + THREAD_SIZE;
> +
> + top_of_stack -= TOP_OF_KERNEL_STACK_PADDING;
> + tsk->thread_info.user_pt_regs = (struct pt_regs *)top_of_stack - 1;
> +}

Can this be put into arch_dup_task_struct() instead of creating another hook?


Brian Gerst