Re: [PATCH v6 00/15] x86-64: Stack protector and percpu improvements
From: Uros Bizjak
Date: Thu Feb 20 2025 - 04:53:51 EST
On Wed, Feb 19, 2025 at 2:18 PM Brian Gerst <brgerst@xxxxxxxxx> wrote:
>
> On Wed, Feb 19, 2025 at 6:47 AM Ingo Molnar <mingo@xxxxxxxxxx> wrote:
> >
> >
> > * Uros Bizjak <ubizjak@xxxxxxxxx> wrote:
> >
> > > > Thank you for doing this series - it all looks pretty good from my
> > > > side and I've applied it experimentally to tip:x86/asm. I fixed up
> > > > the trivial details other reviewers and me noticed.
> > > >
> > > > Note that the merge is tentative, it might still need a rebase if
> > > > some fundamental problem comes up - but let's see how testing goes
> > > > in -next.
> > >
> > > I wonder if there would be any benefit if stack canary is put into
> > > struct pcpu_hot?
> >
> > It should definitely be one of the hottest data structures on x86, so
> > moving it there makes sense even if it cannot be measured explicitly.
> >
>
> It would have to be done with linker tricks, since you can't make the
> compiler use a struct member directly.
Something like the attached patch?
It boots and runs without problems.
However, when building the kernel, I get "Absolute relocations
present" warning with thousands of locations:
RELOCS arch/x86/boot/compressed/vmlinux.relocs
WARNING: Absolute relocations present
Offset Info Type Sym.Value Sym.Name
ffffffff81200826 0003259e00000002 R_X86_64_PC32 ffffffff8350f620
__ref_stack_chk_guard
ffffffff81201493 0003259e00000002 R_X86_64_PC32 ffffffff8350f620
__ref_stack_chk_guard
ffffffff81201714 0003259e00000002 R_X86_64_PC32 ffffffff8350f620
__ref_stack_chk_guard
ffffffff81201d66 0003259e00000002 R_X86_64_PC32 ffffffff8350f620
__ref_stack_chk_guard
...
ffffffff834e2a13 0003259e00000002 R_X86_64_PC32 ffffffff8350f620
__ref_stack_chk_guard
ffffffff834e2a6a 0003259e00000002 R_X86_64_PC32 ffffffff8350f620
__ref_stack_chk_guard
RSTRIP vmlinux
which I don't understand. Looking at the first one:
ffffffff8120081d <force_ibs_eilvt_setup.cold>:
ffffffff8120081d: 48 8b 44 24 08 mov 0x8(%rsp),%rax
ffffffff81200822: 65 48 2b 05 f6 ed 30 sub
%gs:0x230edf6(%rip),%rax # ffffffff8350f620
<__ref_stack_chk_guard>
ffffffff81200829: 02
I don't think this is absolute relocation, see (%rip).
The kernel was compiled with gcc-14.2.1, so clang specific issue was not tested.
Uros.
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 20be5758c2d2..940efc07f2c1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -691,7 +691,7 @@ SYM_CODE_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movl TASK_stack_canary(%edx), %ebx
- movl %ebx, PER_CPU_VAR(__stack_chk_guard)
+ movl %ebx, PER_CPU_VAR(pcpu_hot + X86_stack_canary)
#endif
/*
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 49d3b222fe99..4f4c0cf4963f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -193,7 +193,7 @@ SYM_FUNC_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(__stack_chk_guard)
+ movq %rbx, PER_CPU_VAR(pcpu_hot + X86_stack_canary)
#endif
/*
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index bf5953883ec3..a4d515cd6a31 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -22,6 +22,9 @@ struct pcpu_hot {
u64 call_depth;
#endif
unsigned long top_of_stack;
+#ifdef CONFIG_STACKPROTECTOR
+ unsigned long stack_canary;
+#endif
void *hardirq_stack_ptr;
u16 softirq_pending;
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index d43fb589fcf6..5e5229ac1c46 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -20,8 +20,6 @@
#include <linux/sched.h>
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-
/*
* Initialize the stackprotector canary value.
*
@@ -38,12 +36,12 @@ static __always_inline void boot_init_stack_canary(void)
unsigned long canary = get_random_canary();
current->stack_canary = canary;
- this_cpu_write(__stack_chk_guard, canary);
+ this_cpu_write(pcpu_hot.stack_canary, canary);
}
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
- per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
+ per_cpu(pcpu_hot.stack_canary, cpu) = idle->stack_canary;
}
#else /* STACKPROTECTOR */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index a98020bf31bb..59e8b294cbdc 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -109,6 +109,9 @@ static void __used common(void)
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
OFFSET(X86_current_task, pcpu_hot, current_task);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(X86_stack_canary, pcpu_hot, stack_canary);
+#endif
#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 21078907af57..f30d6a9c4abd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -24,7 +24,6 @@
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <linux/pgtable.h>
-#include <linux/stackprotector.h>
#include <linux/utsname.h>
#include <asm/alternative.h>
@@ -2087,13 +2086,6 @@ void syscall_init(void)
}
#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
-#ifndef CONFIG_SMP
-EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
-#endif
-#endif
-
/*
* Clear all 6 debug registers:
*/
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1769a7126224..a35e4ebe032e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -468,7 +468,7 @@ SECTIONS
"kernel image bigger than KERNEL_IMAGE_SIZE");
/* needed for Clang - see arch/x86/entry/entry.S */
-PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
+PROVIDE(__ref_stack_chk_guard = pcpu_hot + X86_stack_canary);
#ifdef CONFIG_X86_64