Re: 8aeb879baf12 - significant system call latency regression, bisected
From: Peter Zijlstra
Date: Fri Jun 19 2026 - 04:14:44 EST
On Wed, Jun 17, 2026 at 02:37:18PM +0200, Peter Zijlstra wrote:
> - makes -fno-jump-tables unconditional
> - removes array_index_nospec() from the syscall dispatch
FWIW, this also allows making all SYSCALLs __noendbr, very much
including the 'legacy' sys_call_table :-)
Compile tested with IA32_EMULATION=n and reliably yields:
vmlinux.o: warning: objtool: sys_call_table+0x0: data relocation to !ENDBR: __x64_sys_read+0x0
...
vmlinux.o: warning: objtool: sys_call_table+0xeb0: data relocation to !ENDBR: __x64_sys_listns+0x0
(which is just one little objtool patch away from being fixed)
and boots fine (in kvm).
---
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 598f178102ee..b154a2a20eb2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -90,17 +90,8 @@ CC_FLAGS_FPU += -mhard-float
endif
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
-#
-# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate
-# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK
-# for jump-tables, as such, disable jump-tables for now.
-#
-# (jump-tables are implicitly disabled by RETPOLINE)
-#
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816
-#
-KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables)
-KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch)
+KBUILD_RUSTFLAGS += -Zcf-protection=branch
else
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif
@@ -173,6 +164,13 @@ endif
KBUILD_RUSTFLAGS += -Ccode-model=kernel
percpu_seg := gs
+
+ # Due to retpolines and cf-protection=branch's implicit NOTRACK usage
+ # for jump-tables, blanked disable jump-tables for all x86_64 builds to
+ # get a consistent behaviour across configurations. This allows
+ # removing some array_index_nospec() usage.
+ KBUILD_CFLAGS += -fno-jump-tables
+ KBUILD_RISTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
endif
ifeq ($(CONFIG_STACKPROTECTOR),y)
@@ -209,15 +207,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
ifdef CONFIG_MITIGATION_RETPOLINE
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
KBUILD_RUSTFLAGS += $(RETPOLINE_RUSTFLAGS)
- # Additionally, avoid generating expensive indirect jumps which
- # are subject to retpolines for small number of switch cases.
- # LLVM turns off jump table generation by default when under
- # retpoline builds, however, gcc does not for x86. This has
- # only been fixed starting from gcc stable version 8.4.0 and
- # onwards, but not for older ones. See gcc bug #86952.
- ifndef CONFIG_CC_IS_CLANG
- KBUILD_CFLAGS += -fno-jump-tables
- endif
endif
ifdef CONFIG_MITIGATION_SLS
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 71f032504e73..7e87947e12be 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,9 +8,10 @@
#include <linux/entry-common.h>
#include <linux/nospec.h>
#include <asm/syscall.h>
+#include <asm/ibt.h>
-#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
+#define __SYSCALL(nr, sym) extern __noendbr long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern __noendbr long __noreturn __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#ifdef CONFIG_X86_X32_ABI
#include <asm/syscalls_x32.h>
@@ -25,30 +26,47 @@
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) (void *)&__x64_##sym,
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+static noinstr long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
+ /*
+ * Because -fno-jump-tables, this compiles into a binary branch tree
+ * rather than a jump-table. As such @nr is not used as an array
+ * index. Additionally, this is an out-of-line function on purpose,
+ * such that all the actual syscall function calls are tail-calls,
+ * returning to our caller for the common bits.
+ */
+ instrumentation_begin();
switch (nr) {
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
+ instrumentation_end();
}
#ifdef CONFIG_X86_X32_ABI
-long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+static noinstr long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
+ instrumentation_begin();
switch (nr) {
#include <asm/syscalls_x32.h>
default: return __x64_sys_ni_syscall(regs);
}
+ instrumentation_end();
+}
+#else
+static __always_inline long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ return __x64_sys_ni_syscall(regs);
}
#endif
+#undef __SYSCALL
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
{
@@ -59,7 +77,6 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
unsigned int unr = nr;
if (likely(unr < NR_syscalls)) {
- unr = array_index_nospec(unr, NR_syscalls);
regs->ax = x64_sys_call(regs, unr);
return true;
}
@@ -76,7 +93,6 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
unsigned int xnr = nr - __X32_SYSCALL_BIT;
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
- xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call(regs, xnr);
return true;
}
@@ -84,7 +100,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
}
/* Returns true to return using SYSRET, or false to use IRET */
-__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
nr = syscall_enter_from_user_mode(regs, nr);
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 7e88705e907f..1639fbc02680 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -7,9 +7,10 @@
#define _ASM_X86_SYSCALL_WRAPPER_H
#include <asm/ptrace.h>
+#include <asm/ibt.h>
-extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
-extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+extern __noendbr long __x64_sys_ni_syscall(const struct pt_regs *regs);
+extern __noendbr long __ia32_sys_ni_syscall(const struct pt_regs *regs);
/*
* Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
@@ -83,15 +84,15 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
__MAP(x, __SC_TYPE, __VA_ARGS__)) \
#define __SYS_STUB0(abi, name) \
- long __##abi##_##name(const struct pt_regs *regs); \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
- long __##abi##_##name(const struct pt_regs *regs) \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs) \
__alias(__do_##name);
#define __SYS_STUBx(abi, name, ...) \
- long __##abi##_##name(const struct pt_regs *regs); \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
- long __##abi##_##name(const struct pt_regs *regs) \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs) \
{ \
return __se_##name(__VA_ARGS__); \
}
@@ -257,8 +258,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-long __x64_sys_getcpu(const struct pt_regs *regs);
-long __x64_sys_gettimeofday(const struct pt_regs *regs);
-long __x64_sys_time(const struct pt_regs *regs);
+long __noendbr __x64_sys_getcpu(const struct pt_regs *regs);
+long __noendbr __x64_sys_gettimeofday(const struct pt_regs *regs);
+long __noendbr __x64_sys_time(const struct pt_regs *regs);
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */