Re: [PATCH -tip] x86/stackprotector: Move stack canary to struct pcpu_hot

From: Brian Gerst
Date: Fri Feb 21 2025 - 08:38:55 EST


On Fri, Feb 21, 2025 at 8:25 AM Uros Bizjak <ubizjak@xxxxxxxxx> wrote:
>
> On Fri, Feb 21, 2025 at 1:54 PM Brian Gerst <brgerst@xxxxxxxxx> wrote:
> >
> > On Thu, Feb 20, 2025 at 3:04 PM Uros Bizjak <ubizjak@xxxxxxxxx> wrote:
> > >
> > > Move stack canary from __stack_chk_guard to struct pcpu_hot and
> > > alias __stack_chk_guard to point to the new location in the
> > > linker script.
> > >
> > > __stack_chk_guard is one of the hottest data structures on x86, so
> > > moving it there makes sense even if its benefit cannot be measured
> > > explicitly.
> > >
> > > Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
> > > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> > > Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> > > Cc: Borislav Petkov <bp@xxxxxxxxx>
> > > Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
> > > Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> > > Cc: Brian Gerst <brgerst@xxxxxxxxx>
> > > Cc: Ard Biesheuvel <ardb@xxxxxxxxxx>
> > > ---
> > > arch/x86/include/asm/current.h | 13 +++++++++++++
> > > arch/x86/kernel/cpu/common.c | 1 -
> > > arch/x86/kernel/vmlinux.lds.S | 2 ++
> > > 3 files changed, 15 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
> > > index bf5953883ec3..e4ff1d15b465 100644
> > > --- a/arch/x86/include/asm/current.h
> > > +++ b/arch/x86/include/asm/current.h
> > > @@ -15,6 +15,9 @@ struct task_struct;
> > > struct pcpu_hot {
> > > union {
> > > struct {
> > > +#ifdef CONFIG_STACKPROTECTOR
> > > + unsigned long stack_canary;
> > > +#endif
> > > struct task_struct *current_task;
> > > int preempt_count;
> > > int cpu_number;
> > > @@ -35,6 +38,16 @@ struct pcpu_hot {
> > > };
> > > static_assert(sizeof(struct pcpu_hot) == 64);
> > >
> > > +/*
> > > + * stack_canary should be at the beginning of struct pcpu_hot to avoid:
> > > + *
> > > + * Invalid absolute R_X86_64_32S relocation: __stack_chk_guard
> >
> > This should be R_X86_64_PC32 relocations.
>
> This is what the build system reports if any offset (including 0) is added to
>
> PROVIDE(__stack_chk_guard = pcpu_hot);
>
> It is not a warning, but an error that fails the build.
>
> As was discussed in the previous thread, the above is required to
> handle !SMP case, where mstack-protector-guard=global (used by !SMP
> builds) ignores the
> -mstack-protector-guard-symbol option and always uses __stack_chk_guard.

I got a warning from the relocs tool, but not a hard error. What
compiler/linker are you using?

Does the attached patch build in your configuration?


Brian Gerst
From ee30f3a936e641978c965d82990c11792b598fd8 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@xxxxxxxxx>
Date: Wed, 19 Feb 2025 12:38:02 -0500
Subject: [PATCH] x86/stackprotector: Move canary to pcpu_hot

The stack protector canary value is frequently accessed on function
entry and exit, so move it to the pcpu_hot struct where it will have
fast access.

In order to keep non-SMP builds working, rename __ref_stack_chk_guard to
__stack_chk_guard.

Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
---
arch/x86/Makefile | 2 +-
arch/x86/entry/entry.S | 2 +-
arch/x86/entry/entry_32.S | 2 +-
arch/x86/entry/entry_64.S | 2 +-
arch/x86/include/asm/asm-prototypes.h | 2 +-
arch/x86/include/asm/current.h | 3 +++
arch/x86/include/asm/stackprotector.h | 7 +++----
arch/x86/kernel/asm-offsets.c | 4 ++++
arch/x86/kernel/cpu/common.c | 8 --------
arch/x86/kernel/module.c | 2 +-
arch/x86/kernel/vmlinux.lds.S | 4 +++-
arch/x86/tools/relocs.c | 1 +
12 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 88a1705366f9..cc619d31ccfe 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -197,7 +197,7 @@ endif
ifeq ($(CONFIG_STACKPROTECTOR),y)
ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg)
- KBUILD_CFLAGS += -mstack-protector-guard-symbol=__ref_stack_chk_guard
+ KBUILD_CFLAGS += -mstack-protector-guard-symbol=__stack_chk_guard
else
KBUILD_CFLAGS += -mstack-protector-guard=global
endif
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index fe5344a249a1..d779381a067c 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -63,5 +63,5 @@ THUNK warn_thunk_thunk, __warn_thunk
* instead.
*/
#ifdef CONFIG_STACKPROTECTOR
-EXPORT_SYMBOL(__ref_stack_chk_guard);
+EXPORT_SYMBOL(__stack_chk_guard);
#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 20be5758c2d2..940efc07f2c1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -691,7 +691,7 @@ SYM_CODE_START(__switch_to_asm)

#ifdef CONFIG_STACKPROTECTOR
movl TASK_stack_canary(%edx), %ebx
- movl %ebx, PER_CPU_VAR(__stack_chk_guard)
+ movl %ebx, PER_CPU_VAR(pcpu_hot + X86_stack_canary)
#endif

/*
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 33a955aa01d8..ed43559d66dc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -192,7 +192,7 @@ SYM_FUNC_START(__switch_to_asm)

#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(__stack_chk_guard)
+ movq %rbx, PER_CPU_VAR(pcpu_hot + X86_stack_canary)
#endif

/*
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 3674006e3974..43977c74b22f 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -21,5 +21,5 @@ extern void cmpxchg8b_emu(void);
#endif

#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR)
-extern unsigned long __ref_stack_chk_guard;
+extern unsigned long __stack_chk_guard;
#endif
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index bf5953883ec3..cf0ce44c1d95 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -28,6 +28,9 @@ struct pcpu_hot {
bool hardirq_stack_inuse;
#else
void *softirq_stack_ptr;
+#endif
+#ifdef CONFIG_STACKPROTECTOR
+ unsigned long stack_canary;
#endif
};
u8 pad[64];
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index d43fb589fcf6..4142c88390d3 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -17,11 +17,10 @@
#include <asm/processor.h>
#include <asm/percpu.h>
#include <asm/desc.h>
+#include <asm/current.h>

#include <linux/sched.h>

-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-
/*
* Initialize the stackprotector canary value.
*
@@ -38,12 +37,12 @@ static __always_inline void boot_init_stack_canary(void)
unsigned long canary = get_random_canary();

current->stack_canary = canary;
- this_cpu_write(__stack_chk_guard, canary);
+ this_cpu_write(pcpu_hot.stack_canary, canary);
}

static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
- per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
+ per_cpu(pcpu_hot.stack_canary, cpu) = idle->stack_canary;
}

#else /* STACKPROTECTOR */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index a98020bf31bb..4c02f5d247f2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -112,6 +112,10 @@ static void __used common(void)
#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(X86_stack_canary, pcpu_hot, stack_canary);
+#endif
+
#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64)
/* Offset for fields in aria_ctx */
BLANK();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8b49b1338f76..1735e4a11d9e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -24,7 +24,6 @@
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <linux/pgtable.h>
-#include <linux/stackprotector.h>
#include <linux/utsname.h>

#include <asm/alternative.h>
@@ -2087,13 +2086,6 @@ void syscall_init(void)
}
#endif /* CONFIG_X86_64 */

-#ifdef CONFIG_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
-#ifndef CONFIG_SMP
-EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
-#endif
-#endif
-
/*
* Clear all 6 debug registers:
*/
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index a286f32c5503..fbadc78957df 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -134,7 +134,7 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs,
#if defined(CONFIG_STACKPROTECTOR) && \
defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
case R_X86_64_REX_GOTPCRELX: {
- static unsigned long __percpu *const addr = &__stack_chk_guard;
+ static unsigned long __percpu *const addr = &pcpu_hot.stack_canary;

if (sym->st_value != (u64)addr) {
pr_err("%s: Unsupported GOTPCREL relocation\n", me->name);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1769a7126224..cb0ab873616e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -467,8 +467,10 @@ SECTIONS
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");

+#ifdef CONFIG_STACKPROTECTOR
/* needed for Clang - see arch/x86/entry/entry.S */
-PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
+__stack_chk_guard = pcpu_hot + X86_stack_canary;
+#endif

#ifdef CONFIG_X86_64

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5778bc498415..bf612f1f8c8a 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -89,6 +89,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"__end_rodata_aligned|"
"__initramfs_start|"
"(jiffies|jiffies_64)|"
+ "__stack_chk_guard|"
#if ELF_BITS == 64
"__end_rodata_hpage_align|"
#endif

base-commit: 01157ddc58dc2fe428ec17dd5a18cc13f134639f
--
2.48.1