Re: [PATCH v2 -tip] x86/percpu: Use C for arch_raw_cpu_ptr()

From: Uros Bizjak
Date: Fri Oct 13 2023 - 05:39:20 EST


On Thu, Oct 12, 2023 at 8:01 PM Uros Bizjak <ubizjak@xxxxxxxxx> wrote:
>
> On Thu, Oct 12, 2023 at 7:47 PM Linus Torvalds
> <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
> >
> > On Thu, 12 Oct 2023 at 10:10, Linus Torvalds
> > <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
> > >
> > > The fix seems to be a simple one-liner, ie just
> > >
> > > - asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
> > > + asm(__pcpu_op2_##size(op, __percpu_arg(a[var]), "%[val]") \
> >
> > Nope. That doesn't work at all.
> >
> > It turns out that we're not the only ones that didn't know about the
> > 'a' modifier.
> >
> > clang has also never heard of it in this context, and the above
> > one-liner results in an endless sea of errors, with
> >
> > error: invalid operand in inline asm: 'movq %gs:${1:a}, $0'
> >
> > Looking around, I think it's X86AsmPrinter::PrintAsmOperand() that is
> > supposed to handle these things, and while it does have some handling
> > for 'a', the comment around it says
> >
> > case 'a': // This is an address. Currently only 'i' and 'r' are expected.
> >
> > and I think our use ends up just confusing the heck out of clang. Of
> > course, clang also does this:
> >
> > case 'P': // This is the operand of a call, treat specially.
> > PrintPCRelImm(MI, OpNo, O);
> > return false;
> >
> > so clang *already* generates those 'current' accesses as PCrelative, and I see
> >
> > movq %gs:pcpu_hot(%rip), %r13
> >
> > in the generated code.
> >
> > End result: clang actually generates what we want just using 'P', and
> > the whole "P vs a" is only a gcc thing.
>
> Ugh, this isn't exactly following Clang's claim that "In general,
> Clang is highly compatible with the GCC inline assembly extensions,
> allowing the same set of constraints, modifiers and operands as GCC
> inline assembly."

For added fun I obtained some old clang:

$ clang --version
clang version 11.0.0 (Fedora 11.0.0-3.fc33)

and tried to compile this:

int m;
__seg_gs int n;

void foo (void)
{
asm ("# %a0 %a1" :: "p" (&m), "p" (&n));
asm ("# %P0 %P1" :: "p" (&m), "p" (&n));
}

clang-11:

# m n
# m n

clang-11 -fpie:

# m(%rip) n(%rip)
# m n

clang-11 -m32:

# m n
# m n

gcc:

# m(%rip) n(%rip)
# m n

gcc -fpie:

# m(%rip) n(%rip)
# m n

gcc -m32:

# m n
# m n

Please find attached a patch that should bring some order to this
issue. The patch includes two demonstration sites, the generated code
for mem_encrypt_identity.c does not change while the change in
percpu.h brings expected 4kB code size reduction.

Uros.
diff --git a/arch/x86/include/asm/compiler.h b/arch/x86/include/asm/compiler.h
new file mode 100644
index 000000000000..37c9dea50be6
--- /dev/null
+++ b/arch/x86/include/asm/compiler.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_X86_COMPILER_H
+#define __ASM_X86_COMPILER_H
+
+/*
+ * Substitute a memory reference, with the actual
+ * operand treated as the address.
+ */
+
+#ifdef __clang__
+#define __memref(x) "%P" #x
+#else
+#define __memref(x) "%a" #x
+#endif
+
+#endif /* __ASM_X86_COMPILER_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 60ea7755c0fe..92b8d60a3bf5 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -26,6 +26,7 @@

#include <linux/kernel.h>
#include <linux/stringify.h>
+#include <asm/compiler.h>

#ifdef CONFIG_SMP

@@ -71,7 +72,7 @@
#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
#define __my_cpu_var(var) (*__my_cpu_ptr(&var))
#define __percpu_arg(x) __percpu_prefix "%" #x
-#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
+#define __force_percpu_memref(x) __force_percpu_prefix __memref(x)

/*
* Initialized pointers to per-cpu variables needed for the boot
@@ -175,7 +176,7 @@ do { \
#define percpu_stable_op(size, op, _var) \
({ \
__pcpu_type_##size pfo_val__; \
- asm(__pcpu_op2_##size(op, __force_percpu_arg(P[var]), "%[val]") \
+ asm(__pcpu_op2_##size(op, __force_percpu_memref([var]), "%[val]") \
: [val] __pcpu_reg_##size("=", pfo_val__) \
: [var] "p" (&(_var))); \
(typeof(_var))(unsigned long) pfo_val__; \
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index d73aeb16417f..6768f586ab51 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -43,6 +43,7 @@

#include <asm/setup.h>
#include <asm/sections.h>
+#include <asm/compiler.h>
#include <asm/cmdline.h>
#include <asm/coco.h>
#include <asm/sev.h>
@@ -582,13 +583,13 @@ void __init sme_enable(struct boot_params *bp)
* identity mapped, so we must obtain the address to the SME command
* line argument data using rip-relative addressing.
*/
- asm ("lea sme_cmdline_arg(%%rip), %0"
+ asm ("lea " __memref(1) ", %0"
: "=r" (cmdline_arg)
: "p" (sme_cmdline_arg));
- asm ("lea sme_cmdline_on(%%rip), %0"
+ asm ("lea " __memref(1) ", %0"
: "=r" (cmdline_on)
: "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
+ asm ("lea " __memref(1) ", %0"
: "=r" (cmdline_off)
: "p" (sme_cmdline_off));