[PATCH 05/15] mm: add CONFIG_ANON_VMA_LAZY and folio helpers

From: tao

Date: Wed May 27 2026 - 07:27:49 EST


Add the ANON_VMA_LAZY optimization foundation:
- CONFIG_ANON_VMA_LAZY Kconfig option
- FOLIO_MAPPING_ANON_VMA_LAZY flag for folio->mapping
- add a runtime switch for ANON_VMA_LAZY

This feature delays anon_vma allocation until fork, reducing memory
overhead for VMAs without children.

Signed-off-by: tao <tao.wangtao@xxxxxxxxx>
---
include/linux/page-flags.h | 23 +++++++++++
mm/Kconfig | 14 +++++++
mm/internal.h | 16 ++++++++
mm/mmap.c | 9 ++++
mm/rmap.c | 84 ++++++++++++++++++++++++++++++++++++++
5 files changed, 146 insertions(+)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0e03d816e8b9..c0cc43118877 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -696,6 +696,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
* the FOLIO_MAPPING_ANON_KSM bit may be set along with the FOLIO_MAPPING_ANON
* bit; and then folio->mapping points, not to an anon_vma, but to a private
* structure which KSM associates with that merged folio. See ksm.h.
+ *
+ * If CONFIG_ANON_VMA_LAZY is enabled, the FOLIO_MAPPING_ANON_KSM bit is used
+ * for the ANON_VMA_LAZY optimization. In this case, folio->mapping points to
+ * the ANON_VMA_LAZY root VMA instead of anon_vma. The folio_test_anon()
+ * check also needs to be updated accordingly.
+
*
* Please note that, confusingly, "folio_mapping" refers to the inode
* address_space which maps the folio from disk; whereas "folio_mapped"
@@ -711,11 +717,16 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
#define FOLIO_MAPPING_ANON 0x1
#define FOLIO_MAPPING_ANON_KSM 0x2
#define FOLIO_MAPPING_KSM (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM)
+#define FOLIO_MAPPING_ANON_VMA_LAZY FOLIO_MAPPING_ANON_KSM
#define FOLIO_MAPPING_FLAGS (FOLIO_MAPPING_ANON | FOLIO_MAPPING_ANON_KSM)

static __always_inline bool folio_test_anon(const struct folio *folio)
{
+#ifdef CONFIG_ANON_VMA_LAZY
+ return ((unsigned long)folio->mapping & FOLIO_MAPPING_FLAGS) != 0;
+#else
return ((unsigned long)folio->mapping & FOLIO_MAPPING_ANON) != 0;
+#endif
}

static __always_inline bool folio_test_lazyfree(const struct folio *folio)
@@ -734,6 +745,18 @@ static __always_inline bool PageAnon(const struct page *page)
{
return folio_test_anon(page_folio(page));
}
+
+static inline bool folio_test_anon_vma_lazy(const struct folio *folio)
+{
+#ifdef CONFIG_ANON_VMA_LAZY
+ unsigned long flags = (unsigned long)folio->mapping;
+
+ return (flags & FOLIO_MAPPING_FLAGS) == FOLIO_MAPPING_ANON_VMA_LAZY;
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_KSM
/*
* A KSM page is one of those write-protected "shared pages" or "merged pages"
diff --git a/mm/Kconfig b/mm/Kconfig
index e8bf1e9e6ad9..c16b5d9b3ce9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1412,6 +1412,20 @@ config LOCK_MM_AND_FIND_VMA
bool
depends on !STACK_GROWSUP

+config ARCH_SUPPORTS_ANON_VMA_LAZY
+ def_bool n
+
+config ANON_VMA_LAZY
+ bool "Lazy allocation of anon_vma"
+ def_bool y
+ depends on ARCH_SUPPORTS_ANON_VMA_LAZY && MMU
+ help
+ For anonymous VMAs without children, avoid allocating anon_vma
+ and anon_vma_chain to reduce memory overhead.
+
+ Say Y to enable this optimization for anonymous VMAs without
+ children.
+
config IOMMU_MM_DATA
bool

diff --git a/mm/internal.h b/mm/internal.h
index 3dbbd118a78c..639f9c287f4c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -248,6 +248,22 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)

/* anon_vma_tree_t APIs */

+/* Encoded anon_vma tree type. Must fit within ANON_VMA_TREE_BITS. */
+#define ANON_VMA_TREE_REGULAR 0 /* regular anon_vma */
+#define ANON_VMA_TREE_VMA 1
+#define ANON_VMA_TREE_PARENT 2
+#define ANON_VMA_TREE_INVALID 3 /* reserved */
+
+#define ANON_VMA_TREE_BITS 2
+#define ANON_VMA_TREE_MASK ((1UL << ANON_VMA_TREE_BITS) - 1)
+
+#ifdef CONFIG_ANON_VMA_LAZY
+extern bool anon_vma_lazy_enable;
+static inline bool anon_vma_lazy_enabled(void) { return anon_vma_lazy_enable; }
+#else
+static inline bool anon_vma_lazy_enabled(void) { return false; }
+#endif
+
static inline anon_vma_tree_t make_anon_vma_tree(struct anon_vma *anon_vma)
{
return (anon_vma_tree_t)anon_vma;
diff --git a/mm/mmap.c b/mm/mmap.c
index eac1fb3823eb..2ae733eb39f0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1558,6 +1558,15 @@ static const struct ctl_table mmap_table[] = {
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
#endif
+#ifdef CONFIG_ANON_VMA_LAZY
+ {
+ .procname = "anon_vma_lazy",
+ .data = &anon_vma_lazy_enable,
+ .maxlen = sizeof(anon_vma_lazy_enable),
+ .mode = 0600,
+ .proc_handler = proc_dobool,
+ },
+#endif
};
#endif /* CONFIG_SYSCTL */

diff --git a/mm/rmap.c b/mm/rmap.c
index 5c4eb090c801..48c4463d8b2c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -87,6 +87,90 @@
static struct kmem_cache *anon_vma_cachep;
static struct kmem_cache *anon_vma_chain_cachep;

+#ifdef CONFIG_ANON_VMA_LAZY
+/*
+ * ANON_VMA_LAZY: defer anon_vma allocation until fork().
+ *
+ * anon_vma and anon_vma_chain exist mainly to support reverse mapping
+ * across multiple processes. For VMAs that belong to a single process,
+ * eagerly creating anon_vma introduces unnecessary memory and setup
+ * overhead.
+ *
+ * This optimization delays anon_vma creation until fork(). Before that
+ * the VMA stays in a lazy state and no anon_vma or anon_vma_chain
+ * topology is created.
+ *
+ * vma->anon_vma encodes the anonymous VMA state. Low bits of the pointer
+ * distinguish lazy states:
+ *
+ * NULL
+ * VMA has no anonymous or CoW pages.
+ *
+ * regular anon_vma
+ * Standard anon_vma with anon_vma_chain topology.
+ *
+ * anon_vma_lazy_root | ANON_VMA_TREE_VMA
+ * Lazy root for the VMA that first faults anonymous pages.
+ * No anon_vma or anon_vma_chain topology exists.
+ *
+ * parent_anon_vma | ANON_VMA_TREE_PARENT
+ * Lazy state for VMAs created during fork(). The lazy parent_anon_vma
+ * refers to the anon_vma of the parent VMA.
+ *
+ * Anonymous folios extend folio->mapping with FOLIO_MAPPING_ANON_VMA_LAZY:
+ *
+ * anon_vma | FOLIO_MAPPING_ANON
+ * regular anonymous mapping
+ *
+ * anon_vma_lazy_root | FOLIO_MAPPING_ANON_VMA_LAZY
+ * lazy anonymous mapping
+ *
+ * In typical workloads most VMAs remain in ANON_VMA_TREE_VMA state.
+ * These VMAs have no anon_vma, no anon_vma_chain and only a single VMA.
+ * Reverse mapping can therefore be performed without anon_vma locking,
+ * providing a faster rmap path for the common case.
+ *
+ * During fork(), VMAs in ANON_VMA_TREE_VMA are upgraded to regular
+ * anon_vma in the parent to establish sharing topology. Child VMAs are
+ * created as ANON_VMA_TREE_PARENT and do not allocate anon_vma,
+ * avoiding additional fork overhead.
+ *
+ * Folio mapping rules:
+ *
+ * Lazy anonymous folios store the lazy root in folio->mapping using
+ * FOLIO_MAPPING_ANON_VMA_LAZY. This allows rmap walkers to resolve the
+ * owning VMA without requiring anon_vma topology.
+ *
+ * folio->mapping may be updated during fork() when lazy VMAs are
+ * upgraded to regular anon_vma. dup_anon_rmap() in copy_page_range()
+ * performs the upgrade and installs the new anon_vma mapping.
+ *
+ * folio_move_anon_rmap() updates folio->mapping when anonymous folios
+ * move between VMAs.
+ *
+ * As with regular anonymous memory, __folio_remove_rmap() does not
+ * clear folio->mapping. Rmap walkers validate mappings using
+ * folio_mapped().
+ *
+ * VMA split keeps vma->anon_vma unchanged. The lazy root holds an extra
+ * reference so folio->mapping remains valid without scanning folios.
+ *
+ * Internal helpers:
+ *
+ * anon_vma_link_t
+ * The value encodes a reference to anon_vma topology. Low bits
+ * are used as type tags to distinguish different anon_vma
+ * implementations (e.g. regular anon_vma or anon_vma_lazy).
+ *
+ * anon_rmap_t
+ * anon_rmap_t wraps the tagged pointer used by the rmap code and
+ * provides a type-safe interface for reverse mapping operations,
+ * covering both regular anon_vma and lazy anon_vma mappings.
+ */
+
+bool anon_vma_lazy_enable;
+#endif
+
static inline struct anon_vma *anon_vma_alloc(void)
{
struct anon_vma *anon_vma;
--
2.17.1