Re: [PATCH v3 02/16] mm: introduce leaf entry type and use to simplify leaf entry logic

From: Zi Yan

Date: Mon Nov 10 2025 - 22:57:08 EST


On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:

> The kernel maintains leaf page table entries which contain either:
>
> - Nothing ('none' entries)
> - Present entries (that is stuff the hardware can navigate without fault)
> - Everything else that will cause a fault which the kernel handles
>
> In the 'everything else' group we include swap entries, but we also include
> a number of other things such as migration entries, device private entries
> and marker entries.
>
> Unfortunately this 'everything else' group expresses everything through
> a swp_entry_t type, and these entries are referred to swap entries even
> though they may well not contain a... swap entry.
>
> This is compounded by the rather mind-boggling concept of a non-swap swap
> entry (checked via non_swap_entry()) and the means by which we twist and
> turn to satisfy this.
>
> This patch lays the foundation for reducing this confusion.
>
> We refer to 'everything else' as a 'software-define leaf entry' or
> 'softleaf'. for short And in fact we scoop up the 'none' entries into this
> concept also so we are left with:
>
> - Present entries.
> - Softleaf entries (which may be empty).
>
> This allows for radical simplification across the board - one can simply
> convert any leaf page table entry to a leaf entry via softleaf_from_pte().
>
> If the entry is present, we return an empty leaf entry, so it is assumed
> the caller is aware that they must differentiate between the two categories
> of page table entries, checking for the former via pte_present().
>
> As a result, we can eliminate a number of places where we would otherwise
> need to use predicates to see if we can proceed with leaf page table entry
> conversion and instead just go ahead and do it unconditionally.
>
> We do so where we can, adjusting surrounding logic as necessary to
> integrate the new softleaf_t logic as far as seems reasonable at this
> stage.
>
> We typedef swp_entry_t to softleaf_t for the time being until the
> conversion can be complete, meaning everything remains compatible
> regardless of which type is used. We will eventually remove swp_entry_t
> when the conversion is complete.
>
> We introduce a new header file to keep things clear - leafops.h - this
> imports swapops.h so can direct replace swapops imports without issue, and
> we do so in all the files that require it.
>
> Additionally, add new leafops.h file to core mm maintainers entry.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx>
> ---
> MAINTAINERS | 1 +
> fs/proc/task_mmu.c | 26 +--
> fs/userfaultfd.c | 6 +-
> include/linux/leafops.h | 387 ++++++++++++++++++++++++++++++++++
> include/linux/mm_inline.h | 6 +-
> include/linux/mm_types.h | 25 +++
> include/linux/swapops.h | 28 ---
> include/linux/userfaultfd_k.h | 51 +----
> mm/hmm.c | 2 +-
> mm/hugetlb.c | 37 ++--
> mm/madvise.c | 16 +-
> mm/memory.c | 41 ++--
> mm/mincore.c | 6 +-
> mm/mprotect.c | 6 +-
> mm/mremap.c | 4 +-
> mm/page_vma_mapped.c | 11 +-
> mm/shmem.c | 7 +-
> mm/userfaultfd.c | 6 +-
> 18 files changed, 502 insertions(+), 164 deletions(-)
> create mode 100644 include/linux/leafops.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 2628431dcdfe..314910a70bbf 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -16257,6 +16257,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> F: include/linux/gfp.h
> F: include/linux/gfp_types.h
> F: include/linux/highmem.h
> +F: include/linux/leafops.h
> F: include/linux/memory.h
> F: include/linux/mm.h
> F: include/linux/mm_*.h
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index fc35a0543f01..24d26b49d870 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -14,7 +14,7 @@
> #include <linux/rmap.h>
> #include <linux/swap.h>
> #include <linux/sched/mm.h>
> -#include <linux/swapops.h>
> +#include <linux/leafops.h>
> #include <linux/mmu_notifier.h>
> #include <linux/page_idle.h>
> #include <linux/shmem_fs.h>
> @@ -1230,11 +1230,11 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
> if (pte_present(ptent)) {
> folio = page_folio(pte_page(ptent));
> present = true;
> - } else if (is_swap_pte(ptent)) {
> - swp_entry_t swpent = pte_to_swp_entry(ptent);
> + } else {
> + const softleaf_t entry = softleaf_from_pte(ptent);
>
> - if (is_pfn_swap_entry(swpent))
> - folio = pfn_swap_entry_folio(swpent);
> + if (softleaf_has_pfn(entry))
> + folio = softleaf_to_folio(entry);
> }
>
> if (folio) {

<snip>

>
> @@ -2330,18 +2330,18 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
> if (pte_soft_dirty(pte))
> categories |= PAGE_IS_SOFT_DIRTY;
> } else if (is_swap_pte(pte)) {

This should be just “else” like smaps_hugetlb_range()’s change, right?

> - swp_entry_t swp;
> + softleaf_t entry;
>
> categories |= PAGE_IS_SWAPPED;
> if (!pte_swp_uffd_wp_any(pte))
> categories |= PAGE_IS_WRITTEN;
>
> - swp = pte_to_swp_entry(pte);
> - if (is_guard_swp_entry(swp))
> + entry = softleaf_from_pte(pte);
> + if (softleaf_is_guard_marker(entry))
> categories |= PAGE_IS_GUARD;
> else if ((p->masks_of_interest & PAGE_IS_FILE) &&
> - is_pfn_swap_entry(swp) &&
> - !folio_test_anon(pfn_swap_entry_folio(swp)))
> + softleaf_has_pfn(entry) &&
> + !folio_test_anon(softleaf_to_folio(entry)))
> categories |= PAGE_IS_FILE;
>
> if (pte_swp_soft_dirty(pte))

<snip>

> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 137ce27ff68c..be20468fb5a9 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -3,7 +3,7 @@
> #include <linux/rmap.h>
> #include <linux/hugetlb.h>
> #include <linux/swap.h>
> -#include <linux/swapops.h>
> +#include <linux/leafops.h>
>
> #include "internal.h"
>
> @@ -107,15 +107,12 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
> pte_t ptent = ptep_get(pvmw->pte);
>
> if (pvmw->flags & PVMW_MIGRATION) {
> - swp_entry_t entry;
> - if (!is_swap_pte(ptent))
> - return false;
> - entry = pte_to_swp_entry(ptent);
> + const softleaf_t entry = softleaf_from_pte(ptent);

We do not need is_swap_pte() check here because softleaf_from_pte()
does the check. Just trying to reason the code with myself here.

>
> - if (!is_migration_entry(entry))
> + if (!softleaf_is_migration(entry))
> return false;
>
> - pfn = swp_offset_pfn(entry);
> + pfn = softleaf_to_pfn(entry);
> } else if (is_swap_pte(ptent)) {
> swp_entry_t entry;
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 6580f3cd24bb..395ca58ac4a5 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -66,7 +66,7 @@ static struct vfsmount *shm_mnt __ro_after_init;
> #include <linux/falloc.h>
> #include <linux/splice.h>
> #include <linux/security.h>
> -#include <linux/swapops.h>
> +#include <linux/leafops.h>
> #include <linux/mempolicy.h>
> #include <linux/namei.h>
> #include <linux/ctype.h>
> @@ -2286,7 +2286,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> struct address_space *mapping = inode->i_mapping;
> struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL;
> struct shmem_inode_info *info = SHMEM_I(inode);
> - swp_entry_t swap, index_entry;
> + swp_entry_t swap;
> + softleaf_t index_entry;
> struct swap_info_struct *si;
> struct folio *folio = NULL;
> bool skip_swapcache = false;
> @@ -2298,7 +2299,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> swap = index_entry;
> *foliop = NULL;
>
> - if (is_poisoned_swp_entry(index_entry))
> + if (softleaf_is_poison_marker(index_entry))
> return -EIO;
>
> si = get_swap_device(index_entry);
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index cc4ce205bbec..055ec1050776 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -10,7 +10,7 @@
> #include <linux/pagemap.h>
> #include <linux/rmap.h>
> #include <linux/swap.h>
> -#include <linux/swapops.h>
> +#include <linux/leafops.h>
> #include <linux/userfaultfd_k.h>
> #include <linux/mmu_notifier.h>
> #include <linux/hugetlb.h>
> @@ -208,7 +208,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
> * MISSING|WP registered, we firstly wr-protect a none pte which has no
> * page cache page backing it, then access the page.
> */
> - if (!pte_none(dst_ptep) && !is_uffd_pte_marker(dst_ptep))
> + if (!pte_none(dst_ptep) && !pte_is_uffd_marker(dst_ptep))
> goto out_unlock;
>
> if (page_in_cache) {
> @@ -590,7 +590,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
> if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) {
> const pte_t ptep = huge_ptep_get(dst_mm, dst_addr, dst_pte);
>
> - if (!huge_pte_none(ptep) && !is_uffd_pte_marker(ptep)) {
> + if (!huge_pte_none(ptep) && !pte_is_uffd_marker(ptep)) {
> err = -EEXIST;
> hugetlb_vma_unlock_read(dst_vma);
> mutex_unlock(&hugetlb_fault_mutex_table[hash]);

The rest of the code looks good to me. I will check it again once
you fix the commit log and comments. Thank you for working on this.

Best Regards,
Yan, Zi