[PATCH] mm/memory: refactor finish_fault

From: Sarthak Sharma

Date: Wed Jun 24 2026 - 06:21:12 EST


finish_fault() currently has a goto fallback implementation
where we try to map a large folio with PTEs. If that cannot be
installed, we goto fallback and go through the fallback mapping
path again. This looks weird and is tough to comprehend.

Remove the goto fallback implementation and try to map the
whole folio if allowed. If the whole folio cannot be mapped,
fall back to single page mapping without repeating the whole
function.

The cleanup of finish_fault() was suggested by David in [1].

[1] https://lore.kernel.org/all/3684c55a-6581-4731-b94a-19526f455a1e@xxxxxxxxxx/

Suggested-by: David Hildenbrand (Arm) <david@xxxxxxxxxx>
Signed-off-by: Sarthak Sharma <sarthak.sharma@xxxxxxx>
---
Tested this patch by running mm selftests on baseline and patched 7.1
kernels. No regressions were observed.

mm/memory.c | 78 ++++++++++++++++++++++++++++-------------------------
1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 86a973119bd4..f883b3f3850e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5666,18 +5666,16 @@ static bool vmf_pte_changed(struct vm_fault *vmf)
vm_fault_t finish_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- struct page *page;
+ struct page *page, *map_page;
struct folio *folio;
vm_fault_t ret;
bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) &&
!(vma->vm_flags & VM_SHARED);
- int type, nr_pages;
- unsigned long addr;
+ pte_t *fault_pte, *start_pte;
+ int type, nr_pages, nr_ptes;
+ unsigned long start_addr;
bool needs_fallback = false;

-fallback:
- addr = vmf->address;
-
/* Did we COW the page? */
if (is_cow)
page = vmf->cow_page;
@@ -5695,7 +5693,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}

- if (!needs_fallback && vma->vm_file) {
+ if (vma->vm_file) {
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t file_end;

@@ -5727,56 +5725,62 @@ vm_fault_t finish_fault(struct vm_fault *vmf)

nr_pages = folio_nr_pages(folio);

+ /* Initialize for single page mapping */
+ map_page = page;
+ start_addr = vmf->address;
+ nr_ptes = 1;
+
/* Using per-page fault to maintain the uffd semantics */
- if (unlikely(userfaultfd_armed(vma)) || unlikely(needs_fallback)) {
- nr_pages = 1;
- } else if (nr_pages > 1) {
- pgoff_t idx = folio_page_idx(folio, page);
- /* The page offset of vmf->address within the VMA. */
- pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff;
+ if (nr_pages > 1 && likely(!needs_fallback) && likely(!userfaultfd_armed(vma))) {
+ unsigned long fault_page_idx = folio_page_idx(folio, page);
+ unsigned long pages_to_folio_end = nr_pages - fault_page_idx;
+
+ bool fits_vma = folio_within_vma(folio, vma);
+
/* The index of the entry in the pagetable for fault page. */
pgoff_t pte_off = pte_index(vmf->address);

- /*
- * Fallback to per-page fault in case the folio size in page
- * cache beyond the VMA limits and PMD pagetable limits.
- */
- if (unlikely(vma_off < idx ||
- vma_off + (nr_pages - idx) > vma_pages(vma) ||
- pte_off < idx ||
- pte_off + (nr_pages - idx) > PTRS_PER_PTE)) {
- nr_pages = 1;
- } else {
- /* Now we can set mappings for the whole large folio. */
- addr = vmf->address - idx * PAGE_SIZE;
- page = &folio->page;
+ bool fits_pte_table = pte_off >= fault_page_idx &&
+ pte_off + pages_to_folio_end <= PTRS_PER_PTE;
+
+ if (likely(fits_vma) && likely(fits_pte_table)) {
+ map_page = &folio->page;
+ start_addr = vmf->address - fault_page_idx * PAGE_SIZE;
+ nr_ptes = nr_pages;
}
}

vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
- addr, &vmf->ptl);
+ start_addr, &vmf->ptl);
if (!vmf->pte)
return VM_FAULT_NOPAGE;

+ start_pte = vmf->pte;
+ fault_pte = vmf->pte + ((vmf->address - start_addr) >> PAGE_SHIFT);
+
+ if (nr_ptes > 1 && !pte_range_none(vmf->pte, nr_ptes)) {
+ /* Single page mapping fallback */
+ map_page = page;
+ start_addr = vmf->address;
+ nr_ptes = 1;
+ vmf->pte = fault_pte;
+ }
+
/* Re-check under ptl */
- if (nr_pages == 1 && unlikely(vmf_pte_changed(vmf))) {
- update_mmu_tlb(vma, addr, vmf->pte);
+ if (nr_ptes == 1 && unlikely(vmf_pte_changed(vmf))) {
+ update_mmu_tlb(vma, vmf->address, fault_pte);
ret = VM_FAULT_NOPAGE;
goto unlock;
- } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {
- needs_fallback = true;
- pte_unmap_unlock(vmf->pte, vmf->ptl);
- goto fallback;
}

- folio_ref_add(folio, nr_pages - 1);
- set_pte_range(vmf, folio, page, nr_pages, addr);
+ folio_ref_add(folio, nr_ptes - 1);
+ set_pte_range(vmf, folio, map_page, nr_ptes, start_addr);
type = is_cow ? MM_ANONPAGES : mm_counter_file(folio);
- add_mm_counter(vma->vm_mm, type, nr_pages);
+ add_mm_counter(vma->vm_mm, type, nr_ptes);
ret = 0;

unlock:
- pte_unmap_unlock(vmf->pte, vmf->ptl);
+ pte_unmap_unlock(start_pte, vmf->ptl);
return ret;
}

--
2.39.5