Re: [PATCH 10/9] x86/fpu: Fix 'struct fpu' misalignment on 32-bit kernels

From: Oleg Nesterov
Date: Sun Jun 16 2024 - 06:57:46 EST


On 06/15, Oleg Nesterov wrote:
>
> So perhaps we can (later) change x86_task_fpu(), fpu_clone(), and
> fpu__init_task_struct_size() to use
>
> ALIGN(sizeof(struct task_struct), 64)
>
> and remove the alignment attribute in sched.h?

On the 2nd thought, perhaps this makes sense from the very beginning?
See the patch below, up to you.

> Or use ARCH_MIN_TASKALIGN == __alignof__(union fpregs_state) which is
> also used in fork_init()->kmem_cache_create().

Either way, I hope that CONFIG_X86_VSMP can't define ARCH_MIN_TASKALIGN
less than __alignof__(fpregs_state).

Oleg.
---

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 64509c7f26c8..7887e9493330 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -507,6 +507,9 @@ struct thread_struct {
struct fpu *fpu;
};

+#define X86_TASK_SIZE \
+ ALIGN(sizeof(struct task_struct), __alignof__(union fpregs_state))
+
#define x86_task_fpu(task) ((task)->thread.fpu)

/*
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index f0c4367804b3..613198372764 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -591,7 +591,7 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
* This is safe because task_struct size is a multiple of cacheline size.
*/
struct fpu *src_fpu = x86_task_fpu(current);
- struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+ struct fpu *dst_fpu = (void *)dst + X86_TASK_SIZE;

BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
BUG_ON(!src_fpu);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 4e8d37b5a90b..8b43c83b82c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -71,16 +71,14 @@ static bool __init fpu__probe_without_cpuid(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}

-static struct fpu x86_init_fpu __read_mostly;
+static struct fpu x86_init_fpu __aligned(64) __read_mostly;

static void __init fpu__init_system_early_generic(void)
{
- int this_cpu = smp_processor_id();
-
fpstate_reset(&x86_init_fpu);
current->thread.fpu = &x86_init_fpu;
- per_cpu(fpu_fpregs_owner_ctx, this_cpu) = &x86_init_fpu;
- x86_init_fpu.last_cpu = this_cpu;
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ x86_init_fpu.last_cpu = -1;

if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
@@ -157,7 +155,7 @@ static void __init fpu__init_system_generic(void)
*/
static void __init fpu__init_task_struct_size(void)
{
- int task_size = sizeof(struct task_struct);
+ int task_size = X86_TASK_SIZE;

task_size += sizeof(struct fpu);