Re: [PATCH v5 1/5] mm: move mirrored memory overlap checking to the outer loop
From: Wei Yang
Date: Mon May 25 2026 - 04:36:37 EST
On Fri, May 22, 2026 at 07:43:38AM +0000, Liu, Yuan1 wrote:
>> Subject: Re: [PATCH v5 1/5] mm: move mirrored memory overlap checking to
>> the outer loop
>>
>> On Wed, May 20, 2026 at 05:34:53AM -0400, Yuan Liu wrote:
>> >Move the overlap memmap initialization check from memmap_init_range()
>> >to memmap_init(), and replace the per-PFN check with a memblock-based
>> >check.
>>
>> The description is a little simple.
>>
>> Even I know the purpose, I feel confused at the first glance.
>
>Thanks for the review.
>I will try to rephrase it and provide a clearer description in the next version.
>
>> >
>> >Reviewed-by: Wei Yang <richard.weiyang@xxxxxxxxx>
>> >Reviewed-by: Jason Zeng <jason.zeng@xxxxxxxxx>
>> >Signed-off-by: Yuan Liu <yuan1.liu@xxxxxxxxx>
>> >---
>> > mm/mm_init.c | 29 +++++------------------------
>> > 1 file changed, 5 insertions(+), 24 deletions(-)
>> >
>> >diff --git a/mm/mm_init.c b/mm/mm_init.c
>> >index f9f8e1af921c..24e103a402b0 100644
>> >--- a/mm/mm_init.c
>> >+++ b/mm/mm_init.c
>> >@@ -783,28 +783,6 @@ void __meminit init_deferred_page(unsigned long pfn,
>> int nid)
>> > __init_deferred_page(pfn, nid);
>> > }
>> >
>> >-/* If zone is ZONE_MOVABLE but memory is mirrored, it is an overlapped
>> init */
>> >-static bool __meminit
>> >-overlap_memmap_init(unsigned long zone, unsigned long *pfn)
>> >-{
>> >- static struct memblock_region *r __meminitdata;
>> >-
>> >- if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
>> >- if (!r || *pfn >= memblock_region_memory_end_pfn(r)) {
>> >- for_each_mem_region(r) {
>> >- if (*pfn < memblock_region_memory_end_pfn(r))
>> >- break;
>> >- }
>> >- }
>> >- if (*pfn >= memblock_region_memory_base_pfn(r) &&
>> >- memblock_is_mirror(r)) {
>> >- *pfn = memblock_region_memory_end_pfn(r);
>> >- return true;
>> >- }
>> >- }
>> >- return false;
>> >-}
>> >-
>> > /*
>> > * Only struct pages that correspond to ranges defined by
>> memblock.memory
>> > * are zeroed and initialized by going through __init_single_page()
>> during
>> >@@ -891,8 +869,6 @@ void __meminit memmap_init_range(unsigned long size,
>> int nid, unsigned long zone
>> > * function. They do not exist on hotplugged memory.
>> > */
>> > if (context == MEMINIT_EARLY) {
>> >- if (overlap_memmap_init(zone, &pfn))
>> >- continue;
>> > if (defer_init(nid, pfn, zone_end_pfn)) {
>> > deferred_struct_pages = true;
>> > break;
>> >@@ -956,6 +932,7 @@ static void __init memmap_init(void)
>> > int i, j, zone_id = 0, nid;
>> >
>> > for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
>> {
>> >+ struct memblock_region *r = &memblock.memory.regions[i];
>> > struct pglist_data *node = NODE_DATA(nid);
>> >
>> > for (j = 0; j < MAX_NR_ZONES; j++) {
>> >@@ -964,6 +941,10 @@ static void __init memmap_init(void)
>> > if (!populated_zone(zone))
>> > continue;
>> >
>> >+ if (mirrored_kernelcore && j == ZONE_MOVABLE &&
>> >+ memblock_is_mirror(r))
>> >+ continue;
>> >+
>>
>> So you have figured out the memory layout of mirror memory?
>>
>> Would you mind elaborate?
>
>I have a Xeon server, collected mirror memory layout information as below by using follow changes:
>
>int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
> {
>- if (!mirrored_kernelcore)
>+ int ret;
>+ phys_addr_t end = base + size - 1;
>+
>+ pr_info("memblock_mark_mirror: base=%pa, size=%pa, mirrored_kernelcore=%d\n",
>+ &base, &size, mirrored_kernelcore);
>+
>+ if (!mirrored_kernelcore) {
>+ pr_info("memblock_mark_mirror: mirrored_kernelcore not enabled, skipping\n");
> return 0;
>+ }
>
> system_has_some_mirror = true;
>
>- return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR);
>+ ret = memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR);
>+ pr_info("memblock_mark_mirror: marked [%pa-%pa] as MIRROR, ret=%d\n",
>+ &base, &end, ret);
>+ return ret;
> }
>
>Here is the detailed layout information:
>
Thanks for the detailed output.
After some investigation, I got two things to discuss here:
* current mirror memory would disable memmap defer init
* confirm the mirror memory layout and may simplify handling
Before discussion, let me mark the key point in the output below.
>Case 1: efibootmgr -m t -M 0
>Enable mirror memory below 4GB, and no mirror memory above 4G
>
>=== zoneinfo summary ===
>node zone start_pfn end_pfn start_addr end_addr
>------ ---------- ------------ ------------ ------------------ -------------
>0 DMA 0x1 0xfff 0x1000 0xffffff
>0 DMA32 0x1000 0xfffff 0x1000000 0xffffffff
>0 Normal 0x100000 0x7fbffff 0x100000000 0x7fbfffffff
>0 Movable 0x140000 0x7fbffff 0x140000000 0x7fbfffffff
(1) Normal and Movable zone end with the same address, so overlapped.
>1 Normal 0x7fc0000 0xff7ffff 0x7fc0000000 0xff7fffffff
>1 Movable 0x8000000 0xff7ffff 0x8000000000 0xff7fffffff
The same as (1).
>
>node start end size flags mirror pfn_range
>---- -------------------- -------------------- ------------ --------------- -------- ------------------
>0 0x0000000000001000 0x000000000009dfff 0x000000000009d000 0x2 yes 0x1-0x9e
>0 0x000000000009f000 0x000000000009ffff 0x0000000000001000 0x2 yes 0x9f-0xa0
>0 0x0000000000100000 0x0000000066416fff 0x0000000066317000 0x2 yes 0x100-0x66417
>0 0x00000000777ff000 0x00000000777fffff 0x0000000000001000 0x2 yes 0x777ff-0x77800
>0 0x0000000100000000 0x000000013fffffff 0x0000000040000000 0x2 yes 0x100000-0x140000
(2) You mentioned no mirror memory above 4G. If my understanding is correct, 4G's
address is 0x100000000. So why this range is marked mirror? Is there some
limitation for this?
>0 0x0000000140000000 0x0000007fbfffffff 0x0000007e80000000 0x0 no 0x140000-0x7fc0000
>1 0x0000007fc0000000 0x0000007fffffffff 0x0000000040000000 0x2 yes 0x7fc0000-0x8000000
>1 0x0000008000000000 0x000000ff7fffffff 0x0000007f80000000 0x0 no 0x8000000-0xff80000
>
>Case 2: efibootmgr -m t -M 25
>Enable mirror memory below 4GB, and put 25% percentage memory to mirror above 4GB
>
>=== zoneinfo summary ===
>node zone start_pfn end_pfn start_addr end_addr
>------ ---------- ------------ ------------ ------------------ -----------
>0 DMA 0x1 0xfff 0x1000 0xffffff
>0 DMA32 0x1000 0xfffff 0x1000000 0xffffffff
>0 Normal 0x100000 0x603ffff 0x100000000 0x603fffffff
>0 Movable 0x20c0000 0x603ffff 0x20c0000000 0x603fffffff
The same as (1).
>1 Normal 0x6040000 0xc03ffff 0x6040000000 0xc03fffffff
>1 Movable 0x8040000 0xc03ffff 0x8040000000 0xc03fffffff
The same as (1).
>
>node start end size flags mirror pfn_range
>---- -------------------- -------------------- ------------ ---------- -------- ------------------
>0 0x0000000000001000 0x000000000009dfff 0x000000000009d000 0x2 yes 0x1-0x9e
>0 0x000000000009f000 0x000000000009ffff 0x0000000000001000 0x2 yes 0x9f-0xa0
>0 0x0000000000100000 0x0000000066416fff 0x0000000066317000 0x2 yes 0x100-0x66417
>0 0x00000000777ff000 0x00000000777fffff 0x0000000000001000 0x2 yes 0x777ff-0x77800
>0 0x0000000100000000 0x00000020bfffffff 0x0000001fc0000000 0x2 yes 0x100000-0x20c0000
>0 0x00000020c0000000 0x000000603fffffff 0x0000003f80000000 0x0 no 0x20c0000-0x6040000
>1 0x0000006040000000 0x000000803fffffff 0x0000002000000000 0x2 yes 0x6040000-0x8040000
>1 0x0000008040000000 0x000000c03fffffff 0x0000004000000000 0x0 no 0x8040000-0xc040000
>
>Case 3: efibootmgr -m f -M 25
>Disable mirror memory below 4GB, and put 25% percentage memory to mirror above 4GB
>
>=== zoneinfo summary ===
>node zone start_pfn end_pfn start_addr end_addr
>------ ---------- ------------ ------------ ------------------ ------------
>0 DMA 0x1 0xfff 0x1000 0xffffff
>0 DMA32 0x1000 0xfffff 0x1000000 0xffffffff
>0 Normal 0x100000 0x60bffff 0x100000000 0x60bfffffff
>0 Movable 0x20c0000 0x60bffff 0x20c0000000 0x60bfffffff
>1 Normal 0x60c0000 0xc0bffff 0x60c0000000 0xc0bfffffff
>1 Movable 0x80c0000 0xc0bffff 0x80c0000000 0xc0bfffffff
>
>node start end size flags mirror pfn_range
>---- -------------------- -------------------- ------------ ---------- -------- ------------------
>0 0x0000000000001000 0x000000000009dfff 0x000000000009d000 0x0 no 0x1-0x9e
>0 0x000000000009f000 0x000000000009ffff 0x0000000000001000 0x0 no 0x9f-0xa0
>0 0x0000000000100000 0x0000000066416fff 0x0000000066317000 0x0 no 0x100-0x66417
>0 0x00000000777ff000 0x00000000777fffff 0x0000000000001000 0x0 no 0x777ff-0x77800
>0 0x0000000100000000 0x00000020bfffffff 0x0000001fc0000000 0x2 yes 0x100000-0x20c0000
>0 0x00000020c0000000 0x00000060bfffffff 0x0000004000000000 0x0 no 0x20c0000-0x60c0000
>1 0x00000060c0000000 0x00000080bfffffff 0x0000002000000000 0x2 yes 0x60c0000-0x80c0000
>1 0x00000080c0000000 0x000000c0bfffffff 0x0000004000000000 0x0 no 0x80c0000-0xc0c0000
#1 mirror memory would disable memmap defer init
I hacked my kernel to behave like Case 2 here, and found defer_init is
skipped.
The reason is from (1): both Normal and Movable zone ends at the same address
But what happened is tricky:
* calculate_node_totalpages() count more space to node_spanned_pages
* defer_init() would skip low zone by check end_pfn with pgdat_end_pfn(),
which is far away from real value
The node_spanned_pages in calculate_node_totalpages() is easy to fix:
diff --git a/mm/mm_init.c b/mm/mm_init.c
index db5568cf36e1..8f353d8dde3b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1334,7 +1334,7 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long node_start_pfn,
unsigned long node_end_pfn)
{
- unsigned long realtotalpages = 0, totalpages = 0;
+ unsigned long realtotalpages = 0;
enum zone_type i;
for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -1364,11 +1364,10 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
zone->present_early_pages = real_size;
#endif
- totalpages += spanned;
realtotalpages += real_size;
}
- pgdat->node_spanned_pages = totalpages;
+ pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
pgdat->node_present_pages = realtotalpages;
pr_debug("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
}
But after this, defer_init() is over working. It starts defer from
ZONE_NORMAL, which is not the last zone.
To fix this, let's see below.
#2 confirm mirror memory layout and may simplify handling
In [1], I listed three possible cases for mirror memory layout. According to
the test from Yuan and the output here, case C seems not possible.
[1]: https://lkml.org/2026/4/24/90
But case A is not shown from the output. I expect the Case 1 here would
behave like case A, but as I mentioned at (2), I don't know why memory above
4G is still mirror memory when it says "no mirror memory above 4G".
At lest, we could conclude:
ZONE_MOVABLE would only sits in ZONE_NORMAL without interleave
With this knowledge, we may simplify current handling:
Remove the possible overlapping between ZONE_NORMAL and ZONE_MOVABLE
As ZONE_MOVABLE only sits in ZONE_NORMAL without interleave, we see actually
these two zones are not overlapped. But we see (1) above, because we don't
adjust ZONE_NORMAL if mirrored_kernelcore.
So we can simply do this and these two zones are not overlapped.
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 8f353d8dde3b..16cc42c3ad12 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1170,8 +1170,7 @@ static void __init adjust_zone_range_for_zone_movable(int nid,
arch_zone_highest_possible_pfn[movable_zone]);
/* Adjust for ZONE_MOVABLE starting within this range */
- } else if (!mirrored_kernelcore &&
- *zone_start_pfn < zone_movable_pfn[nid] &&
+ } else if (*zone_start_pfn < zone_movable_pfn[nid] &&
*zone_end_pfn > zone_movable_pfn[nid]) {
*zone_end_pfn = zone_movable_pfn[nid];
And after this, the problem left above for memmap defer_init is solved. Only
last zone would do defer_init.
And after this, other special handling for mirror memory could be removed,
including absent page calculation and the overlap_memmap_init() in this patch.
Two of the below change I have done boot test. And it looks good.
Remove absent pages calculation:
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 16cc42c3ad12..a154053a37db 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1219,40 +1219,11 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
unsigned long zone_start_pfn,
unsigned long zone_end_pfn)
{
- unsigned long nr_absent;
-
/* zone is empty, we don't have any absent pages */
if (zone_start_pfn == zone_end_pfn)
return 0;
- nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
-
- /*
- * ZONE_MOVABLE handling.
- * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
- * and vice versa.
- */
- if (mirrored_kernelcore && zone_movable_pfn[nid]) {
- unsigned long start_pfn, end_pfn;
- struct memblock_region *r;
-
- for_each_mem_region(r) {
- start_pfn = clamp(memblock_region_memory_base_pfn(r),
- zone_start_pfn, zone_end_pfn);
- end_pfn = clamp(memblock_region_memory_end_pfn(r),
- zone_start_pfn, zone_end_pfn);
-
- if (zone_type == ZONE_MOVABLE &&
- memblock_is_mirror(r))
- nr_absent += end_pfn - start_pfn;
-
- if (zone_type == ZONE_NORMAL &&
- !memblock_is_mirror(r))
- nr_absent += end_pfn - start_pfn;
- }
- }
-
- return nr_absent;
+ return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
}
And remove overlap_memmap_init() directly:
diff --git a/mm/mm_init.c b/mm/mm_init.c
index a154053a37db..01c6920fefa1 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -905,8 +905,8 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
* function. They do not exist on hotplugged memory.
*/
if (context == MEMINIT_EARLY) {
- if (overlap_memmap_init(zone, &pfn))
- continue;
if (defer_init(nid, pfn, zone_end_pfn)) {
deferred_struct_pages = true;
break;
So if we only have memory layout for mirror memory as Yuan shows, we may
simplify the code to some extend.
Hope I don't misunderstand the case. Looking for some insights from others.
--
Wei Yang
Help you, Help me