[RFC PATCH 4/7] mm/memblock: Add MEMBLOCK_PRSRV flag

From: Andrey Ryabinin
Date: Wed Oct 02 2024 - 12:09:53 EST


Add MEMBLOCK_PRSRV flag indicating that we don't need to initialize
'struct page' at all. The flag will be used in the following patches
to mark memory intended to be kept intact across kexec.
The 'struct page' for such region assumed to be initialized by the old
kernel, so the new one shouldn't touch it.

This is only initial RFC sketch, in which we assume that 'struct page'
layout doens't change between old and new kernel. The proper solution
would require some form of migration from old 'struct page' to the new
one if layout did change.

Signed-off-by: Andrey Ryabinin <arbn@xxxxxxxxxxxxxxx>
---
include/linux/memblock.h | 7 +++++++
mm/memblock.c | 9 ++++++++-
mm/mm_init.c | 19 +++++++++++++++++++
3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 673d5cae7c813..b3c6029b03624 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -50,6 +50,7 @@ enum memblock_flags {
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */
MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */
+ MEMBLOCK_PRSRV = 0x20, /* struct page presreved during kexec, don't initialize */
};

/**
@@ -132,6 +133,7 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);
+int memblock_reserved_mark_preserved(phys_addr_t base, phys_addr_t size);

void memblock_free_all(void);
void memblock_free(void *ptr, size_t size);
@@ -271,6 +273,11 @@ static inline bool memblock_is_reserved_noinit(struct memblock_region *m)
return m->flags & MEMBLOCK_RSRV_NOINIT;
}

+static inline bool memblock_is_preserved(struct memblock_region *m)
+{
+ return m->flags & MEMBLOCK_PRSRV;
+}
+
static inline bool memblock_is_driver_managed(struct memblock_region *m)
{
return m->flags & MEMBLOCK_DRIVER_MANAGED;
diff --git a/mm/memblock.c b/mm/memblock.c
index 0389ce5cd281e..20ab3272cc166 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1048,6 +1048,12 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
MEMBLOCK_RSRV_NOINIT);
}

+int __init_memblock memblock_reserved_mark_preserved(phys_addr_t base, phys_addr_t size)
+{
+ return memblock_setclr_flag(&memblock.reserved, base, size, 1,
+ MEMBLOCK_PRSRV);
+}
+
static bool should_skip_region(struct memblock_type *type,
struct memblock_region *m,
int nid, int flags)
@@ -2181,7 +2187,8 @@ static void __init memmap_init_reserved_pages(void)
* the MEMBLOCK_RSRV_NOINIT flag set
*/
for_each_reserved_mem_region(region) {
- if (!memblock_is_reserved_noinit(region)) {
+ if (!memblock_is_reserved_noinit(region) &&
+ !memblock_is_preserved(region)) {
nid = memblock_get_region_node(region);
start = region->base;
end = start + region->size;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 4ba5607aaf194..b82c13077928f 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -837,6 +837,22 @@ static void __init init_unavailable_range(unsigned long spfn,
node, zone_names[zone], pgcnt);
}

+static bool pfn_preserved(unsigned long *pfn)
+{
+ struct memblock_region *r;
+
+ for_each_reserved_mem_region(r) {
+ if (memblock_is_preserved(r)) {
+ if (*pfn >= memblock_region_memory_base_pfn(r) &&
+ *pfn < memblock_region_memory_end_pfn(r)) {
+ *pfn = memblock_region_memory_end_pfn(r);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/*
* Initially all pages are reserved - free ones are freed
* up by memblock_free_all() once the early boot process is
@@ -889,6 +905,9 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
}
}

+ if (pfn_preserved(&pfn))
+ continue;
+
page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);
if (context == MEMINIT_HOTPLUG) {
--
2.45.2