Re: [PATCH] arm64: tlb: fix ARM64_TLB_RANGE with LLVM's integrated assembler

From: Sami Tolvanen
Date: Thu Aug 06 2020 - 15:17:37 EST


On Thu, Aug 06, 2020 at 01:01:09PM +0100, Catalin Marinas wrote:
> On Wed, Aug 05, 2020 at 11:19:20AM -0700, Sami Tolvanen wrote:
> > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> > index d493174415db..66c2aab5e9cb 100644
> > --- a/arch/arm64/include/asm/tlbflush.h
> > +++ b/arch/arm64/include/asm/tlbflush.h
> > @@ -16,6 +16,16 @@
> > #include <asm/cputype.h>
> > #include <asm/mmu.h>
> >
> > +/*
> > + * Enable ARMv8.4-TLBI instructions with ARM64_TLB_RANGE. Note that binutils
> > + * doesn't support .arch_extension tlb-rmi, so use .arch armv8.4-a instead.
> > + */
> > +#ifdef CONFIG_ARM64_TLB_RANGE
> > +#define __TLBI_PREAMBLE ".arch armv8.4-a\n"
> > +#else
> > +#define __TLBI_PREAMBLE
> > +#endif
> > +
> > /*
> > * Raw TLBI operations.
> > *
> > @@ -28,14 +38,16 @@
> > * not. The macros handles invoking the asm with or without the
> > * register argument as appropriate.
> > */
> > -#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \
> > +#define __TLBI_0(op, arg) asm (__TLBI_PREAMBLE \
> > + "tlbi " #op "\n" \
> > ALTERNATIVE("nop\n nop", \
> > "dsb ish\n tlbi " #op, \
> > ARM64_WORKAROUND_REPEAT_TLBI, \
> > CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
> > : : )
> >
> > -#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \
> > +#define __TLBI_1(op, arg) asm (__TLBI_PREAMBLE \
> > + "tlbi " #op ", %0\n" \
> > ALTERNATIVE("nop\n nop", \
> > "dsb ish\n tlbi " #op ", %0", \
> > ARM64_WORKAROUND_REPEAT_TLBI, \
>
> A potential problem here is that for gas (not sure about the integrated
> assembler), .arch overrides any other .arch. So if we end up with two
> preambles included in the same generated .S files in the future, it will
> lead to some random behaviour.
>
> Does the LLVM integrated assembler have the same behaviour on .arch
> overriding a prior .arch?

I would assume so, but each inline assembly block is independent in
LLVM, so unless there are .arch changes within the block, that shouldn't
be an issue for the integrated assembler.

> Maybe a better solution is for all inline asm on arm64 to have a
> standard preamble which is the maximum supported architecture version.
> We can add individual .arch_extension as those are not overriding.

Sure, that works. How would you feel about something like this, so we can
keep the preamble in sync with future -Wa,-march changes? I'm not sure if
asm/compiler.h is the correct place for the definition though.

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 55bc8546d9c7..0dd07059beaa 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -82,8 +82,8 @@ endif
# compiler to generate them and consequently to break the single image contract
# we pass it only to the assembler. This option is utilized only in case of non
# integrated assemblers.
-ifneq ($(CONFIG_AS_HAS_ARMV8_4), y)
-branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a
+ifeq ($(CONFIG_AS_HAS_PAC), y)
+asm-arch := armv8.3-a
endif
endif

@@ -91,7 +91,12 @@ KBUILD_CFLAGS += $(branch-prot-flags-y)

ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
# make sure to pass the newest target architecture to -march.
-KBUILD_CFLAGS += -Wa,-march=armv8.4-a
+asm-arch := armv8.4-a
+endif
+
+ifdef asm-arch
+KBUILD_CFLAGS += -Wa,-march=$(asm-arch) \
+ -DARM64_ASM_ARCH='"$(asm-arch)"'
endif

ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index 51a7ce87cdfe..6fb2e6bcc392 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -2,6 +2,12 @@
#ifndef __ASM_COMPILER_H
#define __ASM_COMPILER_H

+#ifdef ARM64_ASM_ARCH
+#define ARM64_ASM_PREAMBLE ".arch " ARM64_ASM_ARCH "\n"
+#else
+#define ARM64_ASM_PREAMBLE
+#endif
+
/*
* The EL0/EL1 pointer bits used by a pointer authentication code.
* This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply.
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index d493174415db..cc3f5a33ff9c 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -28,14 +28,16 @@
* not. The macros handles invoking the asm with or without the
* register argument as appropriate.
*/
-#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \
+#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
+ "tlbi " #op "\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op, \
ARM64_WORKAROUND_REPEAT_TLBI, \
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )

-#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \
+#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
+ "tlbi " #op ", %0\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op ", %0", \
ARM64_WORKAROUND_REPEAT_TLBI, \

Sami