[PATCH v2 30/69] mm/hugetlb: Switch HugeTLB to section-based vmemmap optimization

From: Muchun Song

Date: Wed May 13 2026 - 10:14:13 EST


HugeTLB bootmem vmemmap optimization still carries its own early setup
path, including pre-populating optimized mappings before the generic
sparse-vmemmap code runs.

Now that section metadata records the compound page order, HugeTLB only
needs to mark the bootmem huge page range with that order. The generic
sparse-vmemmap population path can then allocate and map the shared tail
vmemmap pages without any HugeTLB-specific early population code.

Do that by setting the section order when a bootmem huge page is
allocated and dropping the dedicated pre-HVO helpers and related
special-casing.

This removes duplicate early setup logic and switches HugeTLB to the
section-based vmemmap optimization path.

Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
include/linux/hugetlb.h | 1 -
include/linux/mm.h | 3 -
include/linux/mmzone.h | 17 ++++++
mm/bootmem_info.c | 5 +-
mm/hugetlb.c | 26 ++-------
mm/hugetlb_vmemmap.c | 124 ++++++----------------------------------
mm/hugetlb_vmemmap.h | 13 ++---
mm/sparse-vmemmap.c | 29 ----------
8 files changed, 45 insertions(+), 173 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fd901bb3630c..dce8969961ea 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -171,7 +171,6 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);

extern int movable_gigantic_pages __read_mostly;
extern int sysctl_hugetlb_shm_group __read_mostly;
-extern struct list_head huge_boot_pages[MAX_NUMNODES];

void hugetlb_struct_page_init(void);
void hugetlb_bootmem_alloc(void);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 31e27ff6a35f..f39f6fca6551 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4864,9 +4864,6 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
-int vmemmap_populate_hvo(unsigned long start, unsigned long end,
- unsigned int order, struct zone *zone,
- unsigned long headsize);
void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
unsigned long headsize);
void vmemmap_populate_print_last(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bf4c40818b63..d6a5dd042c25 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2264,6 +2264,18 @@ static inline unsigned int section_order(const struct mem_section *section)
}
#endif

+static inline void section_set_order_range(unsigned long pfn, unsigned long nr_pages,
+ unsigned int order)
+{
+ unsigned long section_nr = pfn_to_section_nr(pfn);
+
+ if (!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION))
+ return;
+
+ for (unsigned long i = 0; i < nr_pages / PAGES_PER_SECTION; i++)
+ section_set_order(__nr_to_section(section_nr + i), order);
+}
+
static inline unsigned int pfn_to_section_order(unsigned long pfn)
{
return section_order(__pfn_to_section(pfn));
@@ -2417,6 +2429,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
#else
#define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
#define pfn_in_present_section pfn_valid
+static inline void section_set_order_range(unsigned long pfn, unsigned long nr_pages,
+ unsigned int order)
+{
+}
+
static inline unsigned int pfn_to_section_order(unsigned long pfn)
{
return 0;
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index 3d7675a3ae04..24f45d86ffb3 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -51,9 +51,8 @@ static void __init register_page_bootmem_info_section(unsigned long start_pfn)
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);

- if (!preinited_vmemmap_section(ms))
- register_page_bootmem_memmap(section_nr, pfn_to_page(start_pfn),
- PAGES_PER_SECTION);
+ register_page_bootmem_memmap(section_nr, pfn_to_page(start_pfn),
+ PAGES_PER_SECTION);

usage = ms->usage;
page = virt_to_page(usage);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8debe5c5abce..080f130017e3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -57,7 +57,7 @@ unsigned int default_hstate_idx;
struct hstate hstates[HUGE_MAX_HSTATE];

__initdata nodemask_t hugetlb_bootmem_nodes;
-__initdata struct list_head huge_boot_pages[MAX_NUMNODES];
+static __initdata struct list_head huge_boot_pages[MAX_NUMNODES];

/*
* Due to ordering constraints across the init code for various
@@ -3111,6 +3111,7 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
} else {
list_add_tail(&m->list, &huge_boot_pages[nid]);
m->flags |= HUGE_BOOTMEM_ZONES_VALID;
+ hugetlb_vmemmap_optimize_bootmem_page(m);
/*
* Only initialize the head struct page in memmap_init_reserved_pages,
* rest of the struct pages will be initialized by the HugeTLB
@@ -3264,13 +3265,15 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES);
init_new_hugetlb_folio(folio);

- if (hugetlb_bootmem_page_prehvo(m))
+ if (hugetlb_bootmem_page_prehvo(m)) {
/*
* If pre-HVO was done, just set the
* flag, the HVO code will then skip
* this folio.
*/
folio_set_hugetlb_vmemmap_optimized(folio);
+ section_set_order_range(folio_pfn(folio), folio_nr_pages(folio), 0);
+ }

if (hugetlb_bootmem_page_earlycma(m))
folio_set_hugetlb_cma(folio);
@@ -3314,25 +3317,6 @@ void __init hugetlb_struct_page_init(void)
.max_threads = num_node_state(N_MEMORY),
.numa_aware = true,
};
-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
- struct zone *zone;
-
- for_each_zone(zone) {
- for (int i = 0; i < NR_OPTIMIZABLE_FOLIO_ORDERS; i++) {
- struct page *tail, *p;
- unsigned int order;
-
- tail = zone->vmemmap_tails[i];
- if (!tail)
- continue;
-
- order = i + OPTIMIZABLE_FOLIO_MIN_ORDER;
- p = page_to_virt(tail);
- for (int j = 0; j < PAGE_SIZE / sizeof(struct page); j++)
- init_compound_tail(p + j, NULL, order, zone);
- }
- }
-#endif

padata_do_multithreaded(&job);
}
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 4367118f8f57..730190390ba9 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -16,6 +16,7 @@
#include <linux/mmdebug.h>
#include <linux/pagewalk.h>
#include <linux/pgalloc.h>
+#include <linux/io.h>

#include <asm/tlbflush.h>
#include "hugetlb_vmemmap.h"
@@ -478,12 +479,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
return ret;
}

-/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
-static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *folio)
+static inline bool vmemmap_should_optimize(const struct hstate *h)
{
- if (folio_test_hugetlb_vmemmap_optimized(folio))
- return false;
-
if (!READ_ONCE(vmemmap_optimize_enabled))
return false;

@@ -493,6 +490,15 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
return true;
}

+/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
+static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *folio)
+{
+ if (folio_test_hugetlb_vmemmap_optimized(folio))
+ return false;
+
+ return vmemmap_should_optimize(h);
+}
+
static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
{
const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
@@ -638,9 +644,6 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
epfn = spfn + hugetlb_vmemmap_size(h);
vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio),
OPTIMIZED_FOLIO_VMEMMAP_SIZE);
- register_page_bootmem_memmap(pfn_to_section_nr(folio_pfn(folio)),
- &folio->page,
- OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES);
continue;
}

@@ -706,111 +709,18 @@ void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head
__hugetlb_vmemmap_optimize_folios(h, folio_list, true);
}

-#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
-
-/* Return true of a bootmem allocated HugeTLB page should be pre-HVO-ed */
-static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m)
-{
- unsigned long section_size, psize, pmd_vmemmap_size;
- phys_addr_t paddr;
-
- if (!READ_ONCE(vmemmap_optimize_enabled))
- return false;
-
- if (!hugetlb_vmemmap_optimizable(m->hstate))
- return false;
-
- psize = huge_page_size(m->hstate);
- paddr = virt_to_phys(m);
-
- /*
- * Pre-HVO only works if the bootmem huge page
- * is aligned to the section size.
- */
- section_size = (1UL << PA_SECTION_SHIFT);
- if (!IS_ALIGNED(paddr, section_size) ||
- !IS_ALIGNED(psize, section_size))
- return false;
-
- /*
- * The pre-HVO code does not deal with splitting PMDS,
- * so the bootmem page must be aligned to the number
- * of base pages that can be mapped with one vmemmap PMD.
- */
- pmd_vmemmap_size = (PMD_SIZE / (sizeof(struct page))) << PAGE_SHIFT;
- if (!IS_ALIGNED(paddr, pmd_vmemmap_size) ||
- !IS_ALIGNED(psize, pmd_vmemmap_size))
- return false;
-
- return true;
-}
-
-static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn);
-
-/*
- * Initialize memmap section for a gigantic page, HVO-style.
- */
-void __init hugetlb_vmemmap_init_early(int nid)
+void __init hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m)
{
- unsigned long psize, paddr, section_size;
- unsigned long ns, i, pnum, pfn, nr_pages;
- unsigned long start, end;
- struct huge_bootmem_page *m = NULL;
- void *map;
+ struct hstate *h = m->hstate;
+ unsigned long pfn = PHYS_PFN(__pa(m));

- if (!READ_ONCE(vmemmap_optimize_enabled))
+ if (!vmemmap_should_optimize(h))
return;

- section_size = (1UL << PA_SECTION_SHIFT);
-
- list_for_each_entry(m, &huge_boot_pages[nid], list) {
- struct zone *zone;
-
- if (!vmemmap_should_optimize_bootmem_page(m))
- continue;
-
- nr_pages = pages_per_huge_page(m->hstate);
- psize = nr_pages << PAGE_SHIFT;
- paddr = virt_to_phys(m);
- pfn = PHYS_PFN(paddr);
- map = pfn_to_page(pfn);
- start = (unsigned long)map;
- end = start + hugetlb_vmemmap_size(m->hstate);
- zone = pfn_to_zone(nid, pfn);
-
- if (vmemmap_populate_hvo(start, end, huge_page_order(m->hstate),
- zone, OPTIMIZED_FOLIO_VMEMMAP_SIZE))
- panic("Failed to allocate memmap for HugeTLB page\n");
- memmap_boot_pages_add(OPTIMIZED_FOLIO_VMEMMAP_PAGES);
-
- pnum = pfn_to_section_nr(pfn);
- ns = psize / section_size;
-
- for (i = 0; i < ns; i++) {
- sparse_init_early_section(nid, map, pnum,
- SECTION_IS_VMEMMAP_PREINIT);
- map += section_map_size();
- pnum++;
- }
-
+ section_set_order_range(pfn, pages_per_huge_page(h), huge_page_order(h));
+ if (section_vmemmap_optimizable(__pfn_to_section(pfn)))
m->flags |= HUGE_BOOTMEM_HVO;
- }
-}
-
-static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn)
-{
- struct zone *zone;
- enum zone_type zone_type;
-
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
- zone = &NODE_DATA(nid)->node_zones[zone_type];
- if (zone_spans_pfn(zone, pfn))
- return zone;
- }
-
- return NULL;
}
-#endif

static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
{
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 66e11893d076..0d8c88997066 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -9,8 +9,6 @@
#ifndef _LINUX_HUGETLB_VMEMMAP_H
#define _LINUX_HUGETLB_VMEMMAP_H
#include <linux/hugetlb.h>
-#include <linux/io.h>
-#include <linux/memblock.h>

#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio);
@@ -20,10 +18,7 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
-#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
-void hugetlb_vmemmap_init_early(int nid);
-#endif
-
+void hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m);

static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
{
@@ -69,13 +64,13 @@ static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h,
{
}

-static inline void hugetlb_vmemmap_init_early(int nid)
+static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
+ return 0;
}

-static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
+static inline void hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m)
{
- return 0;
}
#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */

diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 69ae40692e41..b86634903fc0 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -32,7 +32,6 @@
#include <asm/dma.h>
#include <asm/tlbflush.h>

-#include "hugetlb_vmemmap.h"
#include "internal.h"

/*
@@ -372,33 +371,6 @@ static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *
return tail;
}

-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
-int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end,
- unsigned int order, struct zone *zone,
- unsigned long headsize)
-{
- unsigned long maddr;
- struct page *tail;
- pte_t *pte;
- int node = zone_to_nid(zone);
-
- tail = vmemmap_get_tail(order, zone);
- if (!tail)
- return -ENOMEM;
-
- for (maddr = addr; maddr < addr + headsize; maddr += PAGE_SIZE) {
- pte = vmemmap_populate_address(maddr, node, NULL, -1);
- if (!pte)
- return -ENOMEM;
- }
-
- /*
- * Reuse the last page struct page mapped above for the rest.
- */
- return vmemmap_populate_range(maddr, end, node, NULL, page_to_pfn(tail));
-}
-#endif
-
void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
unsigned long addr, unsigned long next)
{
@@ -600,7 +572,6 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
*/
void __init sparse_vmemmap_init_nid_early(int nid)
{
- hugetlb_vmemmap_init_early(nid);
}
#endif

--
2.54.0