[PATCH v15 04/11] x86/mm: Use INVLPGB for kernel TLB flushes

From: Borislav Petkov
Date: Tue Mar 04 2025 - 09:00:22 EST


From: Rik van Riel <riel@xxxxxxxxxxx>

Use broadcast TLB invalidation for kernel addresses when available.
Remove the need to send IPIs for kernel TLB flushes.

[ bp: Integrate dhansen's comments additions, merge the
flush_tlb_all() change into this one too. ]

Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
Link: https://lore.kernel.org/r/20250226030129.530345-5-riel@xxxxxxxxxxx
---
arch/x86/mm/tlb.c | 48 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dbcb5c968ff9..8cd084bc3d98 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1064,7 +1064,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}

-
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -1074,7 +1073,32 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+
+ /* First try (faster) hardware-assisted TLB invalidation. */
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ /* Fall back to the IPI-based invalidation. */
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+/* Flush an arbitrarily large range of memory with INVLPGB. */
+static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
+{
+ unsigned long addr, nr;
+
+ for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
+ nr = (info->end - addr) >> PAGE_SHIFT;
+
+ /*
+ * INVLPGB has a limit on the size of ranges it can
+ * flush. Break up large flushes.
+ */
+ nr = clamp_val(nr, 1, invlpgb_count_max);
+
+ invlpgb_flush_addr_nosync(addr, nr);
+ }
+ __tlbsync();
}

static void do_kernel_range_flush(void *info)
@@ -1087,6 +1111,22 @@ static void do_kernel_range_flush(void *info)
flush_tlb_one_kernel(addr);
}

+static void kernel_tlb_flush_all(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_flush_all();
+ else
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
+
+static void kernel_tlb_flush_range(struct flush_tlb_info *info)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ invlpgb_kernel_range_flush(info);
+ else
+ on_each_cpu(do_kernel_range_flush, info, 1);
+}
+
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
struct flush_tlb_info *info;
@@ -1097,9 +1137,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
TLB_GENERATION_INVALID);

if (info->end == TLB_FLUSH_ALL)
- on_each_cpu(do_flush_tlb_all, NULL, 1);
+ kernel_tlb_flush_all(info);
else
- on_each_cpu(do_kernel_range_flush, info, 1);
+ kernel_tlb_flush_range(info);

put_flush_tlb_info();
}
--
2.43.0