[PATCH 06/12] x86/mm: use INVLPGB for kernel TLB flushes

From: Rik van Riel
Date: Mon Dec 30 2024 - 13:05:56 EST


Use broadcast TLB invalidation for kernel addresses when available.

This stops us from having to send IPIs for kernel TLB flushes.

Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
---
arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6cf881a942bb..29207dc5b807 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1077,6 +1077,32 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}

+static void broadcast_kernel_range_flush(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+ unsigned long maxnr = invlpgb_count_max;
+ unsigned long threshold = tlb_single_page_flush_ceiling * maxnr;
+
+ /*
+ * TLBSYNC only waits for flushes originating on the same CPU.
+ * Disabling migration allows us to wait on all flushes.
+ */
+ guard(preempt)();
+
+ if (end == TLB_FLUSH_ALL ||
+ (end - start) > threshold << PAGE_SHIFT) {
+ invlpgb_flush_all();
+ } else {
+ unsigned long nr;
+ for (addr = start; addr < end; addr += nr << PAGE_SHIFT) {
+ nr = min((end - addr) >> PAGE_SHIFT, maxnr);
+ invlpgb_flush_addr(addr, nr);
+ }
+ }
+
+ tlbsync();
+}
+
static void do_kernel_range_flush(void *info)
{
struct flush_tlb_info *f = info;
@@ -1089,6 +1115,11 @@ static void do_kernel_range_flush(void *info)

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ broadcast_kernel_range_flush(start, end);
+ return;
+ }
+
/* Balance as user space task's flush, a bit conservative */
if (end == TLB_FLUSH_ALL ||
(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
--
2.47.1