[PATCH 11/11] arm64: percpu: use local percpu for this_cpu_*() APIs

From: Yang Shi

Date: Wed Apr 29 2026 - 13:16:43 EST


Use local percpu address for this_cpu_*() APIs. Because the percpu
variable is mapped to the same virtual address, their address can be
calculated by using __per_cpu_local_off which has same value for all
CPUs. So preempt_disable/preempt_enable is not needed anymore. This
optimization can improve the performance for this_cpu_*() operations.

Kernel build test on AmpereOne (160 cores) with default Fedora kernel
config in a memcg roughly showed 13% - 15% sys time improvement.

Signed-off-by: Yang Shi <yang@xxxxxxxxxxxxxxxxxxxxxx>
---
arch/arm64/include/asm/percpu.h | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index b57b2bb00967..15db56f981de 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -12,6 +12,7 @@
#include <asm/stack_pointer.h>
#include <asm/sysreg.h>

+extern unsigned long __per_cpu_local_off;
static inline void set_my_cpu_offset(unsigned long off)
{
asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
@@ -153,19 +154,21 @@ PERCPU_RET_OP(add, add, ldadd)
* disabled.
*/

+#define local_cpu_ptr(ptr) \
+({ \
+ __verify_pcpu_ptr(ptr); \
+ SHIFT_PERCPU_PTR(ptr, __per_cpu_local_off); \
+})
+
#define _pcp_protect(op, pcp, ...) \
({ \
- preempt_disable_notrace(); \
- op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
- preempt_enable_notrace(); \
+ op(local_cpu_ptr(&(pcp)), __VA_ARGS__); \
})

#define _pcp_protect_return(op, pcp, args...) \
({ \
typeof(pcp) __retval; \
- preempt_disable_notrace(); \
- __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
- preempt_enable_notrace(); \
+ __retval = (typeof(pcp))op(local_cpu_ptr(&(pcp)), ##args); \
__retval; \
})

@@ -251,7 +254,7 @@ PERCPU_RET_OP(add, add, ldadd)
old__ = o; \
new__ = n; \
preempt_disable_notrace(); \
- ptr__ = raw_cpu_ptr(&(pcp)); \
+ ptr__ = local_cpu_ptr(&(pcp)); \
ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \
preempt_enable_notrace(); \
ret__; \
--
2.47.0