[PATCH 3/3, v4] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks

From: Ingo Molnar
Date: Thu Jun 06 2024 - 04:31:13 EST



* Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:

> On Wed, 5 Jun 2024 at 09:27, Oleg Nesterov <oleg@xxxxxxxxxx> wrote:
> >
> > Yes, but kernel_fpu_begin() never does save_fpregs_to_fpstate() if
> > current->flags & PF_KTHREAD ?
>
> Ahh, and init_thread does have PF_KTHREAD.
>
> Ok, looks fine to me, except I think the commit message should be cleared up.
>
> The whole sentence about
>
> "But the init task isn't supposed to be using the FPU in any case ..."
>
> is just simply not true.
>
> It should be more along the lines of "kernel threads don't need an FPU
> save area, because their FPU use is not preemptible or reentrant and
> they don't return to user space".

Indeed - I fixed & clarified the changelog accordingly - see the new patch
further below.

I changed the debug check to test for PF_KTHREAD, and to return NULL:

+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+ if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+ return NULL;
+
+ return (void *)task + sizeof(*task);
+}
+#endif

... and the NULL we return will likely crash & exit any kthreads attempting
to use the FPU context area - which I think is preferable to returning
invalid memory that may then be corrupted.

Hopefully this remains a hypothethical concern. :-)

Alternatively, this may be one of the very few cases where a BUG_ON() might
be justified? This condition is not recoverable in any sane fashion IMO.

Thanks,

Ingo

============================>
From: Ingo Molnar <mingo@xxxxxxxxxx>
Date: Wed, 5 Jun 2024 10:35:57 +0200
Subject: [PATCH] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks

init_task's FPU state initialization was a bit of a hack:

__x86_init_fpu_begin = .;
. = __x86_init_fpu_begin + 128*PAGE_SIZE;
__x86_init_fpu_end = .;

But the init task isn't supposed to be using the FPU context
in any case, so remove the hack and add in some debug warnings.

As Linus noted in the discussion, the init task (and other
PF_KTHREAD tasks) *can* use the FPU via kernel_fpu_begin()/_end(),
but they don't need the context area because their FPU use is not
preemptible or reentrant, and they don't return to user-space.

Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Uros Bizjak <ubizjak@xxxxxxxxx>
Link: https://lore.kernel.org/r/20240605083557.2051480-4-mingo@xxxxxxxxxx
---
arch/x86/include/asm/processor.h | 6 +++++-
arch/x86/kernel/fpu/core.c | 13 ++++++++++---
arch/x86/kernel/fpu/init.c | 5 ++---
arch/x86/kernel/fpu/xstate.c | 3 ---
arch/x86/kernel/vmlinux.lds.S | 4 ----
5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 249c5fa20de4..ed8981866f4d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -504,7 +504,11 @@ struct thread_struct {
#endif
};

-#define x86_task_fpu(task) ((struct fpu *)((void *)task + sizeof(*task)))
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)task + sizeof(*task)))
+#endif

/*
* X86 doesn't need any embedded-FPU-struct quirks:
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0ccabcd3bf62..d9ff8ca5b32d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -51,6 +51,16 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);

+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+ if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+ return NULL;
+
+ return (void *)task + sizeof(*task);
+}
+#endif
+
/*
* Can we use the FPU in kernel mode with the
* whole "kernel_fpu_begin/end()" sequence?
@@ -591,10 +601,8 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
* This is safe because task_struct size is a multiple of cacheline size.
*/
struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
- struct fpu *src_fpu = x86_task_fpu(current);

BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
- BUG_ON(!src_fpu);

/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
@@ -657,7 +665,6 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
if (update_fpu_shstk(dst, ssp))
return 1;

- trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);

return 0;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 11aa31410df2..53580e59e5db 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
- fpstate_init_soft(&x86_task_fpu(current)->fpstate->regs.soft);
+ ;
else
#endif
asm volatile ("fninit");
@@ -164,7 +164,7 @@ static void __init fpu__init_task_struct_size(void)
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
- task_size -= sizeof(x86_task_fpu(current)->__fpstate.regs);
+ task_size -= sizeof(union fpregs_state);

/*
* Add back the dynamically-calculated register state
@@ -209,7 +209,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_kernel_cfg.default_size = size;
fpu_user_cfg.max_size = size;
fpu_user_cfg.default_size = size;
- fpstate_reset(x86_task_fpu(current));
}

/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 90b11671e943..1f37da22ddbe 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -844,9 +844,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (err)
goto out_disable;

- /* Reset the state for the current task */
- fpstate_reset(x86_task_fpu(current));
-
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 226244a894da..3509afc6a672 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -170,10 +170,6 @@ SECTIONS
/* equivalent to task_pt_regs(&init_task) */
__top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE;

- __x86_init_fpu_begin = .;
- . = __x86_init_fpu_begin + 128*PAGE_SIZE;
- __x86_init_fpu_end = .;
-
#ifdef CONFIG_X86_32
/* 32 bit has nosave before _edata */
NOSAVE_DATA