Re: [GIT PULL] arm64 fixes for -rc7
From: Marc Zyngier
Date: Tue Mar 18 2025 - 07:47:55 EST
On Mon, 17 Mar 2025 16:00:35 +0000,
Will Deacon <will@xxxxxxxxxx> wrote:
>
> Hi Linus,
>
> On Fri, Mar 14, 2025 at 10:34:57AM -1000, Linus Torvalds wrote:
> > On Fri, 14 Mar 2025 at 06:05, Will Deacon <will@xxxxxxxxxx> wrote:
> > >
> > > Summary in the tag, but the main one is a horrible macro fix for our
> > > TLB flushing code which resulted in over-invalidation on the MMU
> > > notifier path.
> >
> > From a quick look, that macro is still quite broken. Maybe not in ways
> > that matter, but still...
> >
> > In particular, the 'stride' argument is used multiple times, and
> > without parentheses.
> >
> > The 'lpa' argument is also used multiple times, and the input to that
> > is typically something like kvm_lpa2_is_enabled(), so I think it
> > potentially generates lots of pointless duplicate code with that
> > BUG_ON() in system_supports_lpa2() -> cpus_have_final_cap().
> >
> > Maybe the compiler figures it out. But that macro is bad, bad, bad.
> > When it looks like a function, it should act like a function, and not
> > evaluate its arguments multiple times.
> >
> > The most immediate bug may have been fixed, but not the actual real
> > horror of that thing.
>
> Yes, the minimal fix for -rc7 avoids explicitly mutating the macro
> arguments but we still have the multiple-evaluation problem you point
> out above.
>
> Ideally, this function would be rewritten as a 'static inline' but it
> was moved from C code into a macro as part of 360839027a6e ("arm64: tlb:
> Refactor the core flush algorithm of __flush_tlb_range") because we need
> to propagate the 'op' argument down to the low-level asm where it's
> stringified as part of the instruction mnemonic.
>
> I'll have a crack at reworking things to take a 'const char *' instead,
> but it won't be for 6.14 as it'll be reasonably invasive.
I had a go at the 'const char *' approach, but couldn't make it work
reliably without making it very invasive.
I ended up with a slightly bigger hammer (see below) that survived
booting on a test box and running a couple of VMs. I wouldn't trust it
with anything more important than that though.
M.
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 8104aee4f9a08..0ff635cf8abf5 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -393,45 +393,93 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* operations can only span an even number of pages. We save this for last to
* ensure 64KB start alignment is maintained for the LPA2 case.
*/
+typedef void (*tlbi_level_fn_t)(u64, int);
+typedef void (*tlbi_fn_t)(u64);
+
+#define __TLBI_LEVEL_FN(t, s) \
+static inline void tlbi_level_##t##s(u64 addr, int level) \
+{ \
+ __tlbi_level(t##s, addr, level); \
+} \
+static inline void tlbi_user_level_##t##s(u64 addr, int level) \
+{ \
+ __tlbi_user_level(t##s, addr, level); \
+}
+
+#define __TLBI_FN(t, s) \
+static inline void tlbi_##t##s(u64 addr) \
+{ \
+ __tlbi(t##s, addr); \
+} \
+static inline void tlbi_user_##t##s(u64 addr) \
+{ \
+ __tlbi_user(t##s, addr); \
+}
+
+#define TLBI_FNS(t) \
+ __TLBI_FN(t, ) \
+ __TLBI_FN(t, is) \
+ __TLBI_FN(r##t, ) \
+ __TLBI_FN(r##t, is) \
+ __TLBI_LEVEL_FN(t, ) \
+ __TLBI_LEVEL_FN(t, is) \
+ __TLBI_LEVEL_FN(r##t, ) \
+ __TLBI_LEVEL_FN(r##t, is)
+
+/* These are the TLBI instructions we allow for range operation */
+TLBI_FNS(ipas2e1)
+TLBI_FNS(vae1)
+TLBI_FNS(vale1)
+TLBI_FNS(vaale1)
+
+static __always_inline
+void __flush_tlb_range_by_op(tlbi_level_fn_t il, tlbi_level_fn_t iul,
+ tlbi_fn_t ri, tlbi_fn_t riu,
+ u64 start, u64 pages, int stride,
+ u16 asid, int tlb_level,
+ bool tlbi_user, bool lpa2)
+{
+ int num = 0;
+ int scale = 3;
+ int shift = lpa2 ? 16 : PAGE_SHIFT;
+ unsigned long addr;
+
+ while (pages > 0) {
+ if (!system_supports_tlb_range() ||
+ pages == 1 ||
+ (lpa2 && start != ALIGN(start, SZ_64K))) {
+ addr = __TLBI_VADDR(start, asid);
+ il(addr, tlb_level);
+ if (tlbi_user)
+ iul(addr, tlb_level);
+ start += stride;
+ pages -= stride >> PAGE_SHIFT;
+ continue;
+ }
+
+ num = __TLBI_RANGE_NUM(pages, scale);
+ if (num >= 0) {
+ addr = __TLBI_VADDR_RANGE(start >> shift, asid,
+ scale, num, tlb_level);
+ ri(addr);
+ if (tlbi_user)
+ riu(addr);
+ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+ pages -= __TLBI_RANGE_PAGES(num, scale);
+ }
+ scale--;
+ }
+}
+
#define __flush_tlb_range_op(op, start, pages, stride, \
- asid, tlb_level, tlbi_user, lpa2) \
-do { \
- typeof(start) __flush_start = start; \
- typeof(pages) __flush_pages = pages; \
- int num = 0; \
- int scale = 3; \
- int shift = lpa2 ? 16 : PAGE_SHIFT; \
- unsigned long addr; \
- \
- while (__flush_pages > 0) { \
- if (!system_supports_tlb_range() || \
- __flush_pages == 1 || \
- (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \
- addr = __TLBI_VADDR(__flush_start, asid); \
- __tlbi_level(op, addr, tlb_level); \
- if (tlbi_user) \
- __tlbi_user_level(op, addr, tlb_level); \
- __flush_start += stride; \
- __flush_pages -= stride >> PAGE_SHIFT; \
- continue; \
- } \
- \
- num = __TLBI_RANGE_NUM(__flush_pages, scale); \
- if (num >= 0) { \
- addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
- scale, num, tlb_level); \
- __tlbi(r##op, addr); \
- if (tlbi_user) \
- __tlbi_user(r##op, addr); \
- __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
- __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
- } \
- scale--; \
- } \
-} while (0)
+ asid, tlb_level, tlbi_user, lpa2) \
+ __flush_tlb_range_by_op(tlbi_level_##op, tlbi_user_level_##op, \
+ tlbi_r##op, tlbi_user_r##op, \
+ start, pages, stride, asid, \
+ tlb_level, tlbi_user, lpa2)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
- __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled())
static inline bool __flush_tlb_range_limit_excess(unsigned long start,
unsigned long end, unsigned long pages, unsigned long stride)
--
Without deviation from the norm, progress is not possible.