[PATCH 4/9] x86/fpu: Remove the thread::fpu pointer

From: Ingo Molnar
Date: Sat Jun 08 2024 - 03:32:51 EST


As suggested by Oleg, remove the thread::fpu pointer, as we can
calculate it via x86_task_fpu() at compile-time.

This improves code generation a bit:

kepler:~/tip> size vmlinux.before vmlinux.after
text data bss dec hex filename
26475405 10435342 1740804 38651551 24dc69f vmlinux.before
26475339 10959630 1216516 38651485 24dc65d vmlinux.after

Suggested-by: Oleg Nesterov <oleg@xxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Uros Bizjak <ubizjak@xxxxxxxxx>
Link: https://lore.kernel.org/r/20240605083557.2051480-3-mingo@xxxxxxxxxx
---
arch/x86/include/asm/processor.h | 5 +----
arch/x86/kernel/fpu/core.c | 4 +---
arch/x86/kernel/fpu/init.c | 1 -
arch/x86/kernel/process.c | 2 --
arch/x86/kernel/vmlinux.lds.S | 4 ++++
5 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 64509c7f26c8..3de609aad0af 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -502,12 +502,9 @@ struct thread_struct {

struct thread_shstk shstk;
#endif
-
- /* Floating point and extended processor state */
- struct fpu *fpu;
};

-#define x86_task_fpu(task) ((task)->thread.fpu)
+#define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))

/*
* X86 doesn't need any embedded-FPU-struct quirks:
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index f0c4367804b3..167a9c7ed6d3 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -591,13 +591,11 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
* This is safe because task_struct size is a multiple of cacheline size.
*/
struct fpu *src_fpu = x86_task_fpu(current);
- struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+ struct fpu *dst_fpu = x86_task_fpu(dst);

BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
BUG_ON(!src_fpu);

- dst->thread.fpu = dst_fpu;
-
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 4e8d37b5a90b..794682b52373 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -78,7 +78,6 @@ static void __init fpu__init_system_early_generic(void)
int this_cpu = smp_processor_id();

fpstate_reset(&x86_init_fpu);
- current->thread.fpu = &x86_init_fpu;
per_cpu(fpu_fpregs_owner_ctx, this_cpu) = &x86_init_fpu;
x86_init_fpu.last_cpu = this_cpu;

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5bb73bc0e31a..4184c085627e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -96,8 +96,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
- /* Drop the copied pointer to current's fpstate */
- dst->thread.fpu = NULL;

return 0;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3509afc6a672..226244a894da 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -170,6 +170,10 @@ SECTIONS
/* equivalent to task_pt_regs(&init_task) */
__top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE;

+ __x86_init_fpu_begin = .;
+ . = __x86_init_fpu_begin + 128*PAGE_SIZE;
+ __x86_init_fpu_end = .;
+
#ifdef CONFIG_X86_32
/* 32 bit has nosave before _edata */
NOSAVE_DATA
--
2.43.0