[RFC, PATCH 05/12] mm: intercept protnone faults on VM_UFFD_MINOR anonymous VMAs

From: Kiryl Shutsemau (Meta)

Date: Tue Apr 14 2026 - 10:31:24 EST


When a protnone PTE/PMD fault occurs on a VMA with VM_UFFD_MINOR,
dispatch to the userfaultfd minor fault path instead of NUMA balancing.
Async: restore permissions inline. Sync: deliver via handle_userfault().

Feed NUMA locality stats from the fault path via task_numa_fault()
so the scheduler retains placement data even though NUMA scanning
is skipped on these VMAs.

Signed-off-by: Kiryl Shutsemau (Meta) <kas@xxxxxxxxxx>
Assisted-by: Claude:claude-opus-4-6
---
include/linux/huge_mm.h | 6 +++++
mm/huge_memory.c | 24 +++++++++++++++++++
mm/memory.c | 51 +++++++++++++++++++++++++++++++++++++++--
3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a4d9f964dfde..a900bb530998 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -519,6 +519,7 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
}

vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
+vm_fault_t do_huge_pmd_uffd_minor(struct vm_fault *vmf);

vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf);

@@ -707,6 +708,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
return 0;
}

+static inline vm_fault_t do_huge_pmd_uffd_minor(struct vm_fault *vmf)
+{
+ return 0;
+}
+
static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf)
{
return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ad736ff007c..264c646a8573 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2181,6 +2181,30 @@ static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
return pmd_dirty(pmd);
}

+vm_fault_t do_huge_pmd_uffd_minor(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+
+ if (userfaultfd_minor_async(vma)) {
+ pmd_t pmd;
+
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) {
+ spin_unlock(vmf->ptl);
+ return 0;
+ }
+ pmd = pmd_modify(vmf->orig_pmd, vma->vm_page_prot);
+ pmd = pmd_mkyoung(pmd);
+ set_pmd_at(vma->vm_mm, vmf->address & HPAGE_PMD_MASK,
+ vmf->pmd, pmd);
+ update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
+ spin_unlock(vmf->ptl);
+ return 0;
+ }
+
+ return handle_userfault(vmf, VM_UFFD_MINOR);
+}
+
/* NUMA hinting page fault entry point for trans huge pmds */
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
diff --git a/mm/memory.c b/mm/memory.c
index c65e82c86fed..f068ff4027e8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6045,6 +6045,47 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru
}
}

+static void uffd_minor_feed_numa_fault(struct vm_fault *vmf)
+{
+ struct folio *folio;
+
+ folio = vm_normal_folio(vmf->vma, vmf->address, vmf->orig_pte);
+ if (folio) {
+ int nid = folio_nid(folio);
+ int flags = 0;
+
+ if (nid == numa_node_id())
+ flags |= TNF_FAULT_LOCAL;
+ task_numa_fault(folio_last_cpupid(folio), nid, 1, flags);
+ }
+}
+
+static vm_fault_t do_uffd_minor_anon(struct vm_fault *vmf)
+{
+ /* Feed NUMA stats even though we skip NUMA scanning on this VMA */
+ uffd_minor_feed_numa_fault(vmf);
+
+ if (userfaultfd_minor_async(vmf->vma)) {
+ pte_t pte;
+
+ spin_lock(vmf->ptl);
+ if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
+ }
+ pte = pte_modify(vmf->orig_pte, vmf->vma->vm_page_prot);
+ pte = pte_mkyoung(pte);
+ set_pte_at(vmf->vma->vm_mm, vmf->address, vmf->pte, pte);
+ update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
+ }
+
+ /* Sync mode: unmap PTE and deliver to userfaultfd handler */
+ pte_unmap(vmf->pte);
+ return handle_userfault(vmf, VM_UFFD_MINOR);
+}
+
static vm_fault_t do_numa_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@@ -6319,8 +6360,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
if (!pte_present(vmf->orig_pte))
return do_swap_page(vmf);

- if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
+ if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) {
+ if (userfaultfd_minor(vmf->vma))
+ return do_uffd_minor_anon(vmf);
return do_numa_page(vmf);
+ }

spin_lock(vmf->ptl);
entry = vmf->orig_pte;
@@ -6434,8 +6478,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
return 0;
}
if (pmd_trans_huge(vmf.orig_pmd)) {
- if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
+ if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) {
+ if (userfaultfd_minor(vma))
+ return do_huge_pmd_uffd_minor(&vmf);
return do_huge_pmd_numa_page(&vmf);
+ }

if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
!pmd_write(vmf.orig_pmd)) {
--
2.51.2