[PATCH] mm: reduce tlb flush range when changing vma protection

From: Bibo Mao
Date: Tue Mar 08 2022 - 21:57:36 EST


numa worker will periodically change vma prot with PROT_NONE, by
default it will scan 256M vma memory size with pmd stepping size.
If there are fewer pages changed with PROT_NONE, tlb flush is called
with pmd size. This patch will calculate flush range for those
pages with pte prot changed, it will reduce size for tlb flush.

Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
---
mm/mprotect.c | 39 +++++++++++++++++++++++++++++----------
1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2887644fd150..a9f51a998dc8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,9 +35,23 @@

#include "internal.h"

+typedef struct {
+ unsigned long start;
+ unsigned long end;
+} tlb_range;
+
+static inline void add_tlb_range(tlb_range *range, unsigned long start,
+ unsigned long end)
+{
+ if (start < range->start)
+ range->start = start;
+ if (end > range->end)
+ range->end = end;
+}
+
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot,
- unsigned long cp_flags)
+ unsigned long cp_flags, tlb_range *range)
{
pte_t *pte, oldpte;
spinlock_t *ptl;
@@ -138,6 +152,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = pte_mkwrite(ptent);
}
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
+ add_tlb_range(range, addr, addr + PAGE_SIZE);
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -184,6 +199,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,

if (!pte_same(oldpte, newpte)) {
set_pte_at(vma->vm_mm, addr, pte, newpte);
+ add_tlb_range(range, addr, addr + PAGE_SIZE);
pages++;
}
}
@@ -221,7 +237,7 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)

static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pud_t *pud, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+ pgprot_t newprot, unsigned long cp_flags, tlb_range *tlb)
{
pmd_t *pmd;
unsigned long next;
@@ -267,6 +283,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
if (nr_ptes) {
if (nr_ptes == HPAGE_PMD_NR) {
pages += HPAGE_PMD_NR;
+ add_tlb_range(tlb, addr, next);
nr_huge_updates++;
}

@@ -277,7 +294,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
/* fall through, the trans huge pmd just split */
}
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
- cp_flags);
+ cp_flags, tlb);
pages += this_pages;
next:
cond_resched();
@@ -293,7 +310,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,

static inline unsigned long change_pud_range(struct vm_area_struct *vma,
p4d_t *p4d, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+ pgprot_t newprot, unsigned long cp_flags, tlb_range *range)
{
pud_t *pud;
unsigned long next;
@@ -305,7 +322,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
if (pud_none_or_clear_bad(pud))
continue;
pages += change_pmd_range(vma, pud, addr, next, newprot,
- cp_flags);
+ cp_flags, range);
} while (pud++, addr = next, addr != end);

return pages;
@@ -313,7 +330,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,

static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
pgd_t *pgd, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+ pgprot_t newprot, unsigned long cp_flags, tlb_range *range)
{
p4d_t *p4d;
unsigned long next;
@@ -325,7 +342,7 @@ static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
if (p4d_none_or_clear_bad(p4d))
continue;
pages += change_pud_range(vma, p4d, addr, next, newprot,
- cp_flags);
+ cp_flags, range);
} while (p4d++, addr = next, addr != end);

return pages;
@@ -338,24 +355,26 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
unsigned long next;
- unsigned long start = addr;
unsigned long pages = 0;
+ tlb_range range;

BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
+ range.start = end;
+ range.end = addr;
inc_tlb_flush_pending(mm);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
pages += change_p4d_range(vma, pgd, addr, next, newprot,
- cp_flags);
+ cp_flags, &range);
} while (pgd++, addr = next, addr != end);

/* Only flush the TLB if we actually modified any entries: */
if (pages)
- flush_tlb_range(vma, start, end);
+ flush_tlb_range(vma, range.start, range.end);
dec_tlb_flush_pending(mm);

return pages;
--
2.31.1