Re: [PATCH v3 0/2] mm/mprotect: micro-optimization work

From: Andrew Morton

Date: Thu Apr 02 2026 - 14:33:34 EST


On Thu, 2 Apr 2026 15:16:26 +0100 Pedro Falcato <pfalcato@xxxxxxx> wrote:

> Micro-optimize the change_protection functionality and the
> change_pte_range() routine. This set of functions works in an incredibly
> tight loop, and even small inefficiencies are incredibly evident when spun
> hundreds, thousands or hundreds of thousands of times.

Thanks, I updated mm.git's mm-unstable branch to this version.

The update is rather large. If people think it best to spill this work
into next -rc1 then please advise.

>
> v3:
> - Collapse a few lines into a single line in patch 1 (David)
> - Bring the inlining to a higher level (David)
> - Pick up David's patch 1 ACK (thank you!)
> - Pick up Luke Yang's Tested-by (thank you!)
> - Add Luke's results and akpmify the Links: a bit (cover letter)

Here's how v3 altered mm.git:

mm/mprotect.c | 98 +++++++++++++++++++++++++++---------------------
1 file changed, 56 insertions(+), 42 deletions(-)

--- a/mm/mprotect.c~b
+++ a/mm/mprotect.c
@@ -103,7 +103,7 @@ bool can_change_pte_writable(struct vm_a
return can_change_shared_pte_writable(vma, pte);
}

-static __always_inline int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
+static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
pte_t pte, int max_nr_ptes, fpb_t flags)
{
/* No underlying folio, so cannot batch */
@@ -143,7 +143,7 @@ static __always_inline void prot_commit_
* !PageAnonExclusive() pages, starting from start_idx. Caller must enforce
* that the ptes point to consecutive pages of the same anon large folio.
*/
-static int page_anon_exclusive_sub_batch(int start_idx, int max_len,
+static __always_inline int page_anon_exclusive_sub_batch(int start_idx, int max_len,
struct page *first_page, bool expected_anon_exclusive)
{
int idx;
@@ -177,13 +177,6 @@ static __always_inline void commit_anon_
int sub_batch_idx = 0;
int len;

- /* Optimize for the common order-0 case. */
- if (likely(nr_ptes == 1)) {
- prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, 1,
- 0, PageAnonExclusive(first_page), tlb);
- return;
- }
-
while (nr_ptes) {
expected_anon_exclusive = PageAnonExclusive(first_page + sub_batch_idx);
len = page_anon_exclusive_sub_batch(sub_batch_idx, nr_ptes,
@@ -195,7 +188,7 @@ static __always_inline void commit_anon_
}
}

-static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
+static __always_inline void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep,
pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb)
{
@@ -234,8 +227,7 @@ static long change_softleaf_pte(struct v
* just be safe and disable write
*/
if (folio_test_anon(folio))
- entry = make_readable_exclusive_migration_entry(
- swp_offset(entry));
+ entry = make_readable_exclusive_migration_entry(swp_offset(entry));
else
entry = make_readable_migration_entry(swp_offset(entry));
newpte = swp_entry_to_pte(entry);
@@ -246,8 +238,7 @@ static long change_softleaf_pte(struct v
* We do not preserve soft-dirtiness. See
* copy_nonpresent_pte() for explanation.
*/
- entry = make_readable_device_private_entry(
- swp_offset(entry));
+ entry = make_readable_device_private_entry(swp_offset(entry));
newpte = swp_entry_to_pte(entry);
if (pte_swp_uffd_wp(oldpte))
newpte = pte_swp_mkuffd_wp(newpte);
@@ -286,6 +277,45 @@ static long change_softleaf_pte(struct v
return 0;
}

+static __always_inline void change_present_ptes(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ int nr_ptes, unsigned long end, pgprot_t newprot,
+ struct folio *folio, struct page *page, unsigned long cp_flags)
+{
+ const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ const bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+ pte_t ptent, oldpte;
+
+ oldpte = modify_prot_start_ptes(vma, addr, ptep, nr_ptes);
+ ptent = pte_modify(oldpte, newprot);
+
+ if (uffd_wp)
+ ptent = pte_mkuffd_wp(ptent);
+ else if (uffd_wp_resolve)
+ ptent = pte_clear_uffd_wp(ptent);
+
+ /*
+ * In some writable, shared mappings, we might want
+ * to catch actual write access -- see
+ * vma_wants_writenotify().
+ *
+ * In all writable, private mappings, we have to
+ * properly handle COW.
+ *
+ * In both cases, we can sometimes still change PTEs
+ * writable and avoid the write-fault handler, for
+ * example, if a PTE is already dirty and no other
+ * COW or special handling is required.
+ */
+ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
+ !pte_write(ptent))
+ set_write_prot_commit_flush_ptes(vma, folio, page,
+ addr, ptep, oldpte, ptent, nr_ptes, tlb);
+ else
+ prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent,
+ nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
+}
+
static long change_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
@@ -296,7 +326,6 @@ static long change_pte_range(struct mmu_
bool is_private_single_threaded;
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
- bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
int nr_ptes;

tlb_change_page_size(tlb, PAGE_SIZE);
@@ -317,7 +346,6 @@ static long change_pte_range(struct mmu_
int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
struct folio *folio = NULL;
struct page *page;
- pte_t ptent;

/* Already in the desired state. */
if (prot_numa && pte_protnone(oldpte))
@@ -343,34 +371,20 @@ static long change_pte_range(struct mmu_

nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags);

- oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes);
- ptent = pte_modify(oldpte, newprot);
-
- if (uffd_wp)
- ptent = pte_mkuffd_wp(ptent);
- else if (uffd_wp_resolve)
- ptent = pte_clear_uffd_wp(ptent);
-
/*
- * In some writable, shared mappings, we might want
- * to catch actual write access -- see
- * vma_wants_writenotify().
- *
- * In all writable, private mappings, we have to
- * properly handle COW.
- *
- * In both cases, we can sometimes still change PTEs
- * writable and avoid the write-fault handler, for
- * example, if a PTE is already dirty and no other
- * COW or special handling is required.
+ * Optimize for the small-folio common case by
+ * special-casing it here. Compiler constant propagation
+ * plus copious amounts of __always_inline does wonders.
*/
- if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
- !pte_write(ptent))
- set_write_prot_commit_flush_ptes(vma, folio, page,
- addr, pte, oldpte, ptent, nr_ptes, tlb);
- else
- prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
- nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
+ if (likely(nr_ptes == 1)) {
+ change_present_ptes(tlb, vma, addr, pte, 1,
+ end, newprot, folio, page, cp_flags);
+ } else {
+ change_present_ptes(tlb, vma, addr, pte,
+ nr_ptes, end, newprot, folio, page,
+ cp_flags);
+ }
+
pages += nr_ptes;
} else if (pte_none(oldpte)) {
/*
_