Re: Bad FPU state detected at restore_fpregs_from_fpstate, reinitializing FPU registers. WARNING: at arch/x86/mm/extable.c:127 fixup_exception

From: Dave Hansen
Date: Thu Jun 13 2024 - 15:43:47 EST


Does this fix it?

I think moving the explicit 'struct fpu' out of task_struct took the
knowledge away from the compiler on how to keep the XSAVE buffer
aligned. Once that happened, we ended up with unaligned XSAVE
operations and bad things happened.

Also, open-coding "task + sizeof(*task)" in three different places seems
suboptimal.


---

b/arch/x86/include/asm/processor.h | 7 ++++++-
b/arch/x86/kernel/fpu/core.c | 11 +++--------
b/arch/x86/kernel/fpu/init.c | 2 +-
3 files changed, 10 insertions(+), 10 deletions(-)

diff -puN arch/x86/kernel/fpu/core.c~debug-32bit-fpregs arch/x86/kernel/fpu/core.c
--- a/arch/x86/kernel/fpu/core.c~debug-32bit-fpregs 2024-06-13 10:39:30.819589794 -0700
+++ b/arch/x86/kernel/fpu/core.c 2024-06-13 12:37:46.166450575 -0700
@@ -57,7 +57,7 @@ struct fpu *x86_task_fpu(struct task_str
if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
return NULL;

- return (void *)task + sizeof(*task);
+ return __x86_task_fpu(task);
}
#endif

@@ -594,13 +594,8 @@ static int update_fpu_shstk(struct task_
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long ssp)
{
- /*
- * We allocate the new FPU structure right after the end of the task struct.
- * task allocation size already took this into account.
- *
- * This is safe because task_struct size is a multiple of cacheline size.
- */
- struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+ /* 'dst' is partially built, disable debugging: */
+ struct fpu *dst_fpu = __x86_task_fpu(dst);

BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);

diff -puN arch/x86/include/asm/processor.h~debug-32bit-fpregs arch/x86/include/asm/processor.h
--- a/arch/x86/include/asm/processor.h~debug-32bit-fpregs 2024-06-13 11:09:19.508181013 -0700
+++ b/arch/x86/include/asm/processor.h 2024-06-13 12:36:24.480086965 -0700
@@ -504,10 +504,15 @@ struct thread_struct {
#endif
};

+/* XSAVE requrires 64-byte alignment: */
+#define fpu_align(x) ALIGN(x, 64)
+
+#define __x86_task_fpu(task) ((struct fpu *)((void *)(task) + fpu_align(sizeof(*(task)))))
+
#ifdef CONFIG_X86_DEBUG_FPU
extern struct fpu *x86_task_fpu(struct task_struct *task);
#else
-# define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))
+#define x86_task_fpu(task) __x86_task_fpu(task)
#endif

/*
diff -puN arch/x86/kernel/fpu/init.c~debug-32bit-fpregs arch/x86/kernel/fpu/init.c
--- a/arch/x86/kernel/fpu/init.c~debug-32bit-fpregs 2024-06-13 11:55:22.597114454 -0700
+++ b/arch/x86/kernel/fpu/init.c 2024-06-13 12:39:15.924492560 -0700
@@ -158,7 +158,7 @@ static void __init fpu__init_task_struct
{
int task_size = sizeof(struct task_struct);

- task_size += sizeof(struct fpu);
+ task_size += fpu_align(sizeof(struct fpu));

/*
* Subtract off the static size of the register state.
_