[PATCH v2 4/7] mm: extend the template fast path to zone-device compound tails

From: Li Zhe

Date: Thu May 21 2026 - 00:05:08 EST


The template fast path from the previous patch only accelerates head
pages. Compound tails in memmap_init_compound() still go through the
slow path one by one.

Build separate head and tail templates and reuse one prepared tail
template across the tail pages in a compound range. Head pages keep the
zone_device_page_init_refcount() policy, while compound tails always
start with a refcount of 0 after prep_compound_tail().

This extends the template-copy fast path to pfns_per_compound > 1
without changing the existing slow path.

Tested in a VM with a 100 GB devdax namespace (align=2097152) on Intel
Ice Lake server. This test exercises the dax_pmem rebind path and
measures memmap initialization latency.

Test procedure:
Unbind and rebind the dax_pmem driver 30 times, collect memmap
initialization time from the pr_debug() output of memmap_init_zone_device().

Base(v7.1-rc3):
First binding: 1515 ms
Average of subsequent rebinds: 313.45 ms

With patches 1-4 applied:
First binding: 1422 ms
Average of subsequent rebinds: 256.56 ms

This reduces the average rebind time from 313.45 ms to 256.56 ms, or
about 18.1%.

Signed-off-by: Li Zhe <lizhe.67@xxxxxxxxxxxxx>
---
mm/mm_init.c | 51 +++++++++++++++++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 2992711351a0..17a84d4cda01 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1084,17 +1084,25 @@ static inline bool zone_device_page_init_optimization_enabled(void)
IS_ALIGNED(sizeof(struct page), sizeof(u64));
}

-static inline void zone_device_template_page_init(struct page *template,
- unsigned long pfn,
- unsigned long zone_idx,
- int nid,
- struct dev_pagemap *pgmap)
+static inline void zone_device_template_head_page_init(struct page *template,
+ unsigned long pfn, unsigned long zone_idx, int nid,
+ struct dev_pagemap *pgmap)
{
__zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
if (!zone_device_page_init_refcount(pgmap))
set_page_count(template, 0);
}

+static inline void zone_device_template_tail_page_init(struct page *template,
+ unsigned long pfn, unsigned long zone_idx, int nid,
+ struct dev_pagemap *pgmap, const struct page *head,
+ unsigned int order)
+{
+ __zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
+ prep_compound_tail(template, head, order);
+ set_page_count(template, 0);
+}
+
/*
* The copied template already provides the PFN-invariant portion of a
* ZONE_DEVICE struct page. Fix up the fields that still depend on @pfn
@@ -1144,10 +1152,12 @@ static void __ref memmap_init_compound(struct page *head,
unsigned long head_pfn,
unsigned long zone_idx, int nid,
struct dev_pagemap *pgmap,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ bool use_template)
{
unsigned long pfn, end_pfn = head_pfn + nr_pages;
unsigned int order = pgmap->vmemmap_shift;
+ struct page template;

/*
* We have to initialize the pages, including setting up page links.
@@ -1156,12 +1166,28 @@ static void __ref memmap_init_compound(struct page *head,
* the pages in the same go.
*/
__SetPageHead(head);
+
+ /*
+ * A tail template can be reused for all tail pages in the same compound page
+ * because shared state for compound tails is pre-set by prep_compound_tail().
+ * The per-page page->virtual and section in flags are fixed up after copying.
+ */
+ if (use_template)
+ zone_device_template_tail_page_init(&template, head_pfn + 1,
+ zone_idx, nid, pgmap,
+ head, order);
+
for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
struct page *page = pfn_to_page(pfn);

- zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap);
- prep_compound_tail(page, head, order);
- set_page_count(page, 0);
+ if (use_template) {
+ zone_device_page_init_from_template(page, pfn,
+ &template);
+ } else {
+ zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap);
+ prep_compound_tail(page, head, order);
+ set_page_count(page, 0);
+ }
}
prep_compound_head(head, order);
}
@@ -1195,8 +1221,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
}

if (use_template)
- zone_device_template_page_init(&template, start_pfn, zone_idx,
- nid, pgmap);
+ zone_device_template_head_page_init(&template, start_pfn,
+ zone_idx, nid, pgmap);

for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
struct page *page = pfn_to_page(pfn);
@@ -1212,7 +1238,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
continue;

memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
- compound_nr_pages(altmap, pgmap));
+ compound_nr_pages(altmap, pgmap),
+ use_template);
}

pr_debug("%s initialised %lu pages in %ums\n", __func__,
--
2.20.1