Re: [PATCH v9 2/2] mm/memory hotplug/unplug: Optimize zone->contiguous update when changes pfn range
From: David Hildenbrand (Arm)
Date: Wed Feb 11 2026 - 07:20:11 EST
*
+ * online_pages is pages within the zone that have an online memmap.
+ * online_pages include present pages and memory holes that have a
+ * memmap. When spanned_pages == online_pages, pfn_to_page() can be
+ * performed without further checks on any pfn within the zone span.
Maybe pages_with_memmap? It would stand off from managed, spanned and
present, but it's clearer than online IMHO.
offline pages also have a memmap, but that should not be touched as it might contain garbage. So it's a bit more tricky :)
+ *
* So present_pages may be used by memory hotplug or memory power
* management logic to figure out unmanaged pages by checking
* (present_pages - managed_pages). And managed_pages should be used
@@ -967,6 +972,7 @@ struct zone {
atomic_long_t managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
+ unsigned long online_pages;
#if defined(CONFIG_MEMORY_HOTPLUG)
unsigned long present_early_pages;
#endif
@@ -1051,8 +1057,6 @@ struct zone {
bool compact_blockskip_flush;
#endif
- bool contiguous;
-
CACHELINE_PADDING(_pad3_);
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
@@ -1124,6 +1128,23 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
}
+/**
+ * zone_is_contiguous - test whether a zone is contiguous
+ * @zone: the zone to test.
+ *
+ * In a contiguous zone, it is valid to call pfn_to_page() on any pfn in the
+ * spanned zone without requiting pfn_valid() or pfn_to_online_page() checks.
+ *
+ * Returns: true if contiguous, otherwise false.
+ */
+static inline bool zone_is_contiguous(const struct zone *zone)
+{
+ return READ_ONCE(zone->spanned_pages) == READ_ONCE(zone->online_pages);
+}
+
static inline bool zone_is_initialized(const struct zone *zone)
{
return zone->initialized;
diff --git a/mm/internal.h b/mm/internal.h
index f35dbcf99a86..6062f9b8ee62 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -716,21 +716,15 @@ extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
unsigned long end_pfn, struct zone *zone)
{
- if (zone->contiguous)
+ if (zone_is_contiguous(zone))
return pfn_to_page(start_pfn);
return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
}
-void set_zone_contiguous(struct zone *zone);
bool pfn_range_intersects_zones(int nid, unsigned long start_pfn,
unsigned long nr_pages);
-static inline void clear_zone_contiguous(struct zone *zone)
-{
- zone->contiguous = false;
-}
-
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a63ec679d861..76496c1039a9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -492,11 +492,11 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
pfn = find_smallest_section_pfn(nid, zone, end_pfn,
zone_end_pfn(zone));
if (pfn) {
- zone->spanned_pages = zone_end_pfn(zone) - pfn;
+ WRITE_ONCE(zone->spanned_pages, zone_end_pfn(zone) - pfn);
zone->zone_start_pfn = pfn;
} else {
zone->zone_start_pfn = 0;
- zone->spanned_pages = 0;
+ WRITE_ONCE(zone->spanned_pages, 0);
}
} else if (zone_end_pfn(zone) == end_pfn) {
/*
@@ -508,10 +508,10 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
start_pfn);
if (pfn)
- zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
+ WRITE_ONCE(zone->spanned_pages, pfn - zone->zone_start_pfn + 1);
else {
zone->zone_start_pfn = 0;
- zone->spanned_pages = 0;
+ WRITE_ONCE(zone->spanned_pages, 0);
}
}
}
@@ -565,18 +565,13 @@ void remove_pfn_range_from_zone(struct zone *zone,
/*
* Zone shrinking code cannot properly deal with ZONE_DEVICE. So
- * we will not try to shrink the zones - which is okay as
- * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
+ * we will not try to shrink the zones.
*/
if (zone_is_zone_device(zone))
return;
- clear_zone_contiguous(zone);
-
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
update_pgdat_span(pgdat);
-
- set_zone_contiguous(zone);
}
/**
@@ -753,8 +748,6 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id;
- clear_zone_contiguous(zone);
-
if (zone_is_empty(zone))
init_currently_empty_zone(zone, start_pfn, nr_pages);
resize_zone_range(zone, start_pfn, nr_pages);
@@ -782,8 +775,6 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
MEMINIT_HOTPLUG, altmap, migratetype,
isolate_pageblock);
-
- set_zone_contiguous(zone);
}
struct auto_movable_stats {
@@ -1079,6 +1070,7 @@ void adjust_present_page_count(struct page *page, struct memory_group *group,
if (early_section(__pfn_to_section(page_to_pfn(page))))
zone->present_early_pages += nr_pages;
zone->present_pages += nr_pages;
+ WRITE_ONCE(zone->online_pages, zone->online_pages + nr_pages);
zone->zone_pgdat->node_present_pages += nr_pages;
if (group && movable)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 2a809cd8e7fa..e33caa6fb6fc 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2263,9 +2263,10 @@ void __init init_cma_pageblock(struct page *page)
}
#endif
-void set_zone_contiguous(struct zone *zone)
+static void calc_online_pages(struct zone *zone)
{
unsigned long block_start_pfn = zone->zone_start_pfn;
+ unsigned long online_pages = 0;
unsigned long block_end_pfn;
block_end_pfn = pageblock_end_pfn(block_start_pfn);
@@ -2277,12 +2278,11 @@ void set_zone_contiguous(struct zone *zone)
if (!__pageblock_pfn_to_page(block_start_pfn,
block_end_pfn, zone))
- return;
+ continue;
cond_resched();
+ online_pages += block_end_pfn - block_start_pfn;
I think we can completely get rid of this with something like this untested
patch to calculate zone->online_pages for coldplug:
diff --git a/mm/mm_init.c b/mm/mm_init.c
index e33caa6fb6fc..ff2f75e7b49f 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -845,9 +845,9 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
* zone/node above the hole except for the trailing pages in the last
* section that will be appended to the zone/node below.
*/
-static void __init init_unavailable_range(unsigned long spfn,
- unsigned long epfn,
- int zone, int node)
+static u64 __init init_unavailable_range(unsigned long spfn,
+ unsigned long epfn,
+ int zone, int node)
{
unsigned long pfn;
u64 pgcnt = 0;
@@ -861,6 +861,8 @@ static void __init init_unavailable_range(unsigned long spfn,
if (pgcnt)
pr_info("On node %d, zone %s: %lld pages in unavailable ranges\n",
node, zone_names[zone], pgcnt);
+
+ return pgcnt;
}
/*
@@ -959,9 +961,10 @@ static void __init memmap_init_zone_range(struct zone *zone,
memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE,
false);
+ zone->online_pages += (end_pfn - start_pfn);
if (*hole_pfn < start_pfn)
- init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
+ zone->online_pages += init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
*hole_pfn = end_pfn;
}
Looking at set_zone_contiguous(), __pageblock_pfn_to_page() takes care of a weird case where the end of a zone falls into the middle of a pageblock.
I am not even sure if that is possible, but we could handle that easily in pageblock_pfn_to_page() by checking the requested range against the zone spanned range.
Then the semantics "zone->online_pages" would be less weird and more closely resemble "pages with online memmap".
init_unavailable_range() might indeed do the trick!
@Tianyou, can you explore that direction? I know, your PTO is coming up.
--
Cheers,
David