Re: [PATCH v4 2/9] mm/mmu_gather: Invalidate TLB correctly on batch allocation failure and flush

From: Michael Ellerman
Date: Sat Jan 18 2020 - 06:04:22 EST


"Aneesh Kumar K.V" <aneesh.kumar@xxxxxxxxxxxxx> writes:
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
>
> Architectures for which we have hardware walkers of Linux page table should
> flush TLB on mmu gather batch allocation failures and batch flush. Some
> architectures like POWER supports multiple translation modes (hash and radix)
> and in the case of POWER only radix translation mode needs the above TLBI.
> This is because for hash translation mode kernel wants to avoid this extra
> flush since there are no hardware walkers of linux page table. With radix
> translation, the hardware also walks linux page table and with that, kernel
> needs to make sure to TLB invalidate page walk cache before page table pages are
> freed.
>
> More details in
> commit: d86564a2f085 ("mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE")
>
> The changes to sparc are to make sure we keep the old behavior since we are now
> removing HAVE_RCU_TABLE_NO_INVALIDATE. The default value for
> tlb_needs_table_invalidate is to always force an invalidate and sparc can avoid
> the table invalidate. Hence we define tlb_needs_table_invalidate to false for
> sparc architecture.
>
> Cc: <stable@xxxxxxxxxxxxxxx>
> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")

Which went into v4.14, so I like to use:

Cc: stable@xxxxxxxxxxxxxxx # v4.14+


> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx>
> ---
> arch/Kconfig | 3 ---
> arch/powerpc/Kconfig | 1 -
> arch/powerpc/include/asm/tlb.h | 11 +++++++++++
> arch/sparc/Kconfig | 1 -
> arch/sparc/include/asm/tlb_64.h | 9 +++++++++
> include/asm-generic/tlb.h | 22 +++++++++++++++-------
> mm/mmu_gather.c | 16 ++++++++--------
> 7 files changed, 43 insertions(+), 20 deletions(-)

Acked-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx> (powerpc)

cheers

> diff --git a/arch/Kconfig b/arch/Kconfig
> index 48b5e103bdb0..208aad121630 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
> config HAVE_RCU_TABLE_FREE
> bool
>
> -config HAVE_RCU_TABLE_NO_INVALIDATE
> - bool
> -
> config HAVE_MMU_GATHER_PAGE_SIZE
> bool
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 04240205f38c..f9970f87612e 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -223,7 +223,6 @@ config PPC
> select HAVE_PERF_REGS
> select HAVE_PERF_USER_STACK_DUMP
> select HAVE_RCU_TABLE_FREE
> - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
> select HAVE_MMU_GATHER_PAGE_SIZE
> select HAVE_REGS_AND_STACK_ACCESS_API
> select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
> index b2c0be93929d..7f3a8b902325 100644
> --- a/arch/powerpc/include/asm/tlb.h
> +++ b/arch/powerpc/include/asm/tlb.h
> @@ -26,6 +26,17 @@
>
> #define tlb_flush tlb_flush
> extern void tlb_flush(struct mmu_gather *tlb);
> +/*
> + * book3s:
> + * Hash does not use the linux page-tables, so we can avoid
> + * the TLB invalidate for page-table freeing, Radix otoh does use the
> + * page-tables and needs the TLBI.
> + *
> + * nohash:
> + * We still do TLB invalidate in the __pte_free_tlb routine before we
> + * add the page table pages to mmu gather table batch.
> + */
> +#define tlb_needs_table_invalidate() radix_enabled()
>
> /* Get the generic bits... */
> #include <asm-generic/tlb.h>
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index eb24cb1afc11..18e9fb6fcf1b 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -65,7 +65,6 @@ config SPARC64
> select HAVE_KRETPROBES
> select HAVE_KPROBES
> select HAVE_RCU_TABLE_FREE if SMP
> - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
> select HAVE_MEMBLOCK_NODE_MAP
> select HAVE_ARCH_TRANSPARENT_HUGEPAGE
> select HAVE_DYNAMIC_FTRACE
> diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
> index a2f3fa61ee36..8cb8f3833239 100644
> --- a/arch/sparc/include/asm/tlb_64.h
> +++ b/arch/sparc/include/asm/tlb_64.h
> @@ -28,6 +28,15 @@ void flush_tlb_pending(void);
> #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
> #define tlb_flush(tlb) flush_tlb_pending()
>
> +/*
> + * SPARC64's hardware TLB fill does not use the Linux page-tables
> + * and therefore we don't need a TLBI when freeing page-table pages.
> + */
> +
> +#ifdef CONFIG_HAVE_RCU_TABLE_FREE
> +#define tlb_needs_table_invalidate() (false)
> +#endif
> +
> #include <asm-generic/tlb.h>
>
> #endif /* _SPARC64_TLB_H */
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index 2b10036fefd0..9e22ac369d1d 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -137,13 +137,6 @@
> * When used, an architecture is expected to provide __tlb_remove_table()
> * which does the actual freeing of these pages.
> *
> - * HAVE_RCU_TABLE_NO_INVALIDATE
> - *
> - * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
> - * freeing the page-table pages. This can be avoided if you use
> - * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
> - * page-tables natively.
> - *
> * MMU_GATHER_NO_RANGE
> *
> * Use this if your architecture lacks an efficient flush_tlb_range().
> @@ -189,8 +182,23 @@ struct mmu_table_batch {
>
> extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>
> +/*
> + * This allows an architecture that does not use the linux page-tables for
> + * hardware to skip the TLBI when freeing page tables.
> + */
> +#ifndef tlb_needs_table_invalidate
> +#define tlb_needs_table_invalidate() (true)
> +#endif
> +
> +#else
> +
> +#ifdef tlb_needs_table_invalidate
> +#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
> #endif
>
> +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
> +
> +
> #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
> /*
> * If we can't allocate a page to make a big batch of page pointers
> diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
> index 7d70e5c78f97..7c1b8f67af7b 100644
> --- a/mm/mmu_gather.c
> +++ b/mm/mmu_gather.c
> @@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
> */
> static inline void tlb_table_invalidate(struct mmu_gather *tlb)
> {
> -#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
> - /*
> - * Invalidate page-table caches used by hardware walkers. Then we still
> - * need to RCU-sched wait while freeing the pages because software
> - * walkers can still be in-flight.
> - */
> - tlb_flush_mmu_tlbonly(tlb);
> -#endif
> + if (tlb_needs_table_invalidate()) {
> + /*
> + * Invalidate page-table caches used by hardware walkers. Then
> + * we still need to RCU-sched wait while freeing the pages
> + * because software walkers can still be in-flight.
> + */
> + tlb_flush_mmu_tlbonly(tlb);
> + }
> }
>
> static void tlb_remove_table_smp_sync(void *arg)
> --
> 2.24.1