[tip: x86/asm] x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op

From: tip-bot2 for Uros Bizjak
Date: Thu Sep 21 2023 - 17:13:43 EST


The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 7c097ca50d2ba7f7989f01175f366151256bfa10
Gitweb: https://git.kernel.org/tip/7c097ca50d2ba7f7989f01175f366151256bfa10
Author: Uros Bizjak <ubizjak@xxxxxxxxx>
AuthorDate: Mon, 18 Sep 2023 17:14:10 +02:00
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Thu, 21 Sep 2023 09:35:50 +02:00

x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op

The fallback alternative uses %rsi register to manually load pointer
to the percpu variable before the call to the emulation function.
This is unoptimal, because the load is hidden from the compiler.

Move the load of %rsi outside inline asm, so the compiler can
reuse the value. The code in slub.o improves from:

55ac: 49 8b 3c 24 mov (%r12),%rdi
55b0: 48 8d 4a 40 lea 0x40(%rdx),%rcx
55b4: 49 8b 1c 07 mov (%r15,%rax,1),%rbx
55b8: 4c 89 f8 mov %r15,%rax
55bb: 48 8d 37 lea (%rdi),%rsi
55be: e8 00 00 00 00 callq 55c3 <...>
55bf: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
55c3: 75 a3 jne 5568 <...>
55c5: ...

0000000000000000 <.altinstr_replacement>:
5: 65 48 0f c7 0f cmpxchg16b %gs:(%rdi)

to:

55ac: 49 8b 34 24 mov (%r12),%rsi
55b0: 48 8d 4a 40 lea 0x40(%rdx),%rcx
55b4: 49 8b 1c 07 mov (%r15,%rax,1),%rbx
55b8: 4c 89 f8 mov %r15,%rax
55bb: e8 00 00 00 00 callq 55c0 <...>
55bc: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
55c0: 75 a6 jne 5568 <...>
55c2: ...

Where the alternative replacement instruction now uses %rsi:

0000000000000000 <.altinstr_replacement>:
5: 65 48 0f c7 0e cmpxchg16b %gs:(%rsi)

The instruction (effectively a reg-reg move) at 55bb: in the original
assembly is removed. Also, both the CALL and replacement CMPXCHG16B
are 5 bytes long, removing the need for NOPs in the asm code.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Link: https://lore.kernel.org/r/20230918151452.62344-1-ubizjak@xxxxxxxxx
---
arch/x86/include/asm/percpu.h | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a87db61..20624b8 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -242,14 +242,15 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+ asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
: [var] "+m" (_var), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
- "c" (new__.high) \
- : "memory", "esi"); \
+ "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
\
old__.var; \
})
@@ -271,7 +272,7 @@ do { \
old__.var = *_oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+ asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
CC_SET(z) \
: CC_OUT(z) (success), \
@@ -279,8 +280,9 @@ do { \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
- "c" (new__.high) \
- : "memory", "esi"); \
+ "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
likely(success); \
@@ -309,14 +311,15 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+ asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
: [var] "+m" (_var), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
- "c" (new__.high) \
- : "memory", "rsi"); \
+ "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
\
old__.var; \
})
@@ -338,7 +341,7 @@ do { \
old__.var = *_oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+ asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
CC_SET(z) \
: CC_OUT(z) (success), \
@@ -346,8 +349,9 @@ do { \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
- "c" (new__.high) \
- : "memory", "rsi"); \
+ "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
likely(success); \