[PATCH 2/2] arm64: tlb: Add boot parameter to disable TLB flush within the same inner shareable domain
From: Takao Indoh
Date: Mon Jun 17 2019 - 10:49:22 EST
From: Takao Indoh <indou.takao@xxxxxxxxxxx>
This patch adds new boot parameter 'disable_tlbflush_is' to disable TLB
flush within the same inner shareable domain for performance tuning.
In the case of flush_tlb_mm() *without* this parameter, TLB entry is
invalidated by __tlbi(aside1is, asid). By this instruction, all CPUs within
the same inner shareable domain check if there are TLB entries which have
this ASID, this causes performance noise, especially at large-scale HPC
environment, which has more than thousand nodes with low latency
interconnect.
When this new parameter is specified, TLB entry is invalidated by
__tlbi(aside1, asid) only on the CPUs specified by mm_cpumask(mm).
Therefore TLB flush is done on minimal CPUs and performance problem does
not occur.
Signed-off-by: QI Fuli <qi.fuli@xxxxxxxxxxx>
Signed-off-by: Takao Indoh <indou.takao@xxxxxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 4 +
arch/arm64/include/asm/tlbflush.h | 61 ++-----
arch/arm64/kernel/Makefile | 2 +-
arch/arm64/kernel/tlbflush.c | 155 ++++++++++++++++++
4 files changed, 172 insertions(+), 50 deletions(-)
create mode 100644 arch/arm64/kernel/tlbflush.c
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..a693eea34e48 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -848,6 +848,10 @@
disable= [IPV6]
See Documentation/networking/ipv6.txt.
+ disable_tlbflush_is
+ [ARM64] Disable using TLB instruction to flush
+ all PE within the same inner shareable domain.
+
hardened_usercopy=
[KNL] Under CONFIG_HARDENED_USERCOPY, whether
hardening is enabled for this boot. Hardened
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index dff8f9ea5754..ba2b3fd0b63c 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -139,6 +139,13 @@
* on top of these routines, since that is our interface to the mmu_gather
* API as used by munmap() and friends.
*/
+
+void flush_tlb_mm(struct mm_struct *mm);
+void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr);
+void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long stride, bool last_level);
+
static inline void local_flush_tlb_all(void)
{
dsb(nshst);
@@ -155,24 +162,14 @@ static inline void flush_tlb_all(void)
isb();
}
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
unsigned long asid = __TLBI_VADDR(0, ASID(mm));
- dsb(ishst);
- __tlbi(aside1is, asid);
- __tlbi_user(aside1is, asid);
- dsb(ish);
-}
-
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
- unsigned long uaddr)
-{
- unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
-
- dsb(ishst);
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
+ dsb(nshst);
+ __tlbi(aside1, asid);
+ __tlbi_user(aside1, asid);
+ dsb(nsh);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -188,40 +185,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
*/
#define MAX_TLBI_OPS PTRS_PER_PTE
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- unsigned long stride, bool last_level)
-{
- unsigned long asid = ASID(vma->vm_mm);
- unsigned long addr;
-
- start = round_down(start, stride);
- end = round_up(end, stride);
-
- if ((end - start) >= (MAX_TLBI_OPS * stride)) {
- flush_tlb_mm(vma->vm_mm);
- return;
- }
-
- /* Convert the stride into units of 4k */
- stride >>= 12;
-
- start = __TLBI_VADDR(start, asid);
- end = __TLBI_VADDR(end, asid);
-
- dsb(ishst);
- for (addr = start; addr < end; addr += stride) {
- if (last_level) {
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
- } else {
- __tlbi(vae1is, addr);
- __tlbi_user(vae1is, addr);
- }
- }
- dsb(ish);
-}
-
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9e7dcb2c31c7..266c9a57b081 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,7 +19,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o
+ syscall.o tlbflush.o
extra-$(CONFIG_EFI) := efi-entry.o
diff --git a/arch/arm64/kernel/tlbflush.c b/arch/arm64/kernel/tlbflush.c
new file mode 100644
index 000000000000..52c9a237759a
--- /dev/null
+++ b/arch/arm64/kernel/tlbflush.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 FUJITSU LIMITED
+
+#include <linux/smp.h>
+#include <asm/tlbflush.h>
+
+struct tlb_args {
+ struct vm_area_struct *ta_vma;
+ unsigned long ta_start;
+ unsigned long ta_end;
+ unsigned long ta_stride;
+ bool ta_last_level;
+};
+
+int disable_tlbflush_is;
+
+static int __init disable_tlbflush_is_setup(char *str)
+{
+ disable_tlbflush_is = 1;
+
+ return 0;
+}
+__setup("disable_tlbflush_is", disable_tlbflush_is_setup);
+
+static inline void __flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+
+ dsb(ishst);
+ __tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
+ dsb(ish);
+}
+
+static inline void ipi_flush_tlb_mm(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ local_flush_tlb_mm(mm);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (disable_tlbflush_is)
+ on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+ (void *)mm, true);
+ else
+ __flush_tlb_mm(mm);
+}
+
+static inline void __flush_tlb_page_nosync(unsigned long addr)
+{
+ dsb(ishst);
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+}
+
+static inline void __local_flush_tlb_page_nosync(unsigned long addr)
+{
+ dsb(nshst);
+ __tlbi(vale1, addr);
+ __tlbi_user(vale1, addr);
+ dsb(nsh);
+}
+
+static inline void ipi_flush_tlb_page_nosync(void *arg)
+{
+ unsigned long addr = *(unsigned long *)arg;
+
+ __local_flush_tlb_page_nosync(addr);
+}
+
+void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr)
+{
+ unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+
+ if (disable_tlbflush_is)
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+ ipi_flush_tlb_page_nosync, &addr, true);
+ else
+ __flush_tlb_page_nosync(addr);
+}
+
+static inline void ___flush_tlb_range(unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level)
+{
+ unsigned long addr;
+
+ dsb(ishst);
+ for (addr = start; addr < end; addr += stride) {
+ if (last_level) {
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+ } else {
+ __tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
+ }
+ }
+ dsb(ish);
+}
+
+static inline void __local_flush_tlb_range(unsigned long addr, bool last_level)
+{
+ dsb(nshst);
+ if (last_level) {
+ __tlbi(vale1, addr);
+ __tlbi_user(vale1, addr);
+ } else {
+ __tlbi(vae1, addr);
+ __tlbi_user(vae1, addr);
+ }
+ dsb(nsh);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+ struct tlb_args *ta = (struct tlb_args *)arg;
+ unsigned long addr;
+
+ for (addr = ta->ta_start; addr < ta->ta_end; addr += ta->ta_stride)
+ __local_flush_tlb_range(addr, ta->ta_last_level);
+}
+
+void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long stride, bool last_level)
+{
+ unsigned long asid = ASID(vma->vm_mm);
+
+ start = round_down(start, stride);
+ end = round_up(end, stride);
+
+ if ((end - start) >= (MAX_TLBI_OPS * stride)) {
+ flush_tlb_mm(vma->vm_mm);
+ return;
+ }
+
+ /* Convert the stride into units of 4k */
+ stride >>= 12;
+
+ start = __TLBI_VADDR(start, asid);
+ end = __TLBI_VADDR(end, asid);
+
+ if (disable_tlbflush_is) {
+ struct tlb_args ta = {
+ .ta_start = start,
+ .ta_end = end,
+ .ta_stride = stride,
+ .ta_last_level = last_level,
+ };
+
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+ &ta, true);
+ } else
+ ___flush_tlb_range(start, end, stride, last_level);
+}
--
2.20.1