[PATCH v4 7/9] mm/dmb: Introduce Designated Movable Blocks

From: Doug Berger
Date: Fri Mar 10 2023 - 19:40:54 EST


Designated Movable Blocks are blocks of memory that are composed
of one or more adjacent memblocks that have the MEMBLOCK_MOVABLE
designation. These blocks must be reserved before receiving that
designation and will be located in the ZONE_MOVABLE zone rather
than any other zone that may span them.

Signed-off-by: Doug Berger <opendmb@xxxxxxxxx>
---
include/linux/dmb.h | 29 ++++++++++++++
mm/Kconfig | 12 ++++++
mm/Makefile | 1 +
mm/dmb.c | 91 +++++++++++++++++++++++++++++++++++++++++++
mm/memblock.c | 6 ++-
mm/page_alloc.c | 95 ++++++++++++++++++++++++++++++++++++++-------
6 files changed, 220 insertions(+), 14 deletions(-)
create mode 100644 include/linux/dmb.h
create mode 100644 mm/dmb.c

diff --git a/include/linux/dmb.h b/include/linux/dmb.h
new file mode 100644
index 000000000000..fa2976c0fa21
--- /dev/null
+++ b/include/linux/dmb.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DMB_H__
+#define __DMB_H__
+
+#include <linux/memblock.h>
+
+/*
+ * the buddy -- especially pageblock merging and alloc_contig_range()
+ * -- can deal with only some pageblocks of a higher-order page being
+ * MIGRATE_MOVABLE, we can use pageblock_nr_pages.
+ */
+#define DMB_MIN_ALIGNMENT_PAGES pageblock_nr_pages
+#define DMB_MIN_ALIGNMENT_BYTES (PAGE_SIZE * DMB_MIN_ALIGNMENT_PAGES)
+
+enum {
+ DMB_DISJOINT = 0,
+ DMB_INTERSECTS,
+ DMB_MIXED,
+};
+
+struct dmb;
+
+extern int dmb_intersects(unsigned long spfn, unsigned long epfn);
+
+extern int dmb_reserve(phys_addr_t base, phys_addr_t size,
+ struct dmb **res_dmb);
+extern void dmb_init_region(struct memblock_region *region);
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index 4751031f3f05..85ac5f136487 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -913,6 +913,18 @@ config CMA_AREAS

If unsure, leave the default value "7" in UMA and "19" in NUMA.

+config DMB_COUNT
+ int "Maximum count of Designated Movable Blocks"
+ default 19 if NUMA
+ default 7
+ help
+ Designated Movable Blocks are blocks of memory that can be used
+ by the page allocator exclusively for movable pages. They are
+ managed in ZONE_MOVABLE but may overlap with other zones. This
+ parameter sets the maximum number of DMBs in the system.
+
+ If unsure, leave the default value "7" in UMA and "19" in NUMA.
+
config MEM_SOFT_DIRTY
bool "Track memory changes"
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
diff --git a/mm/Makefile b/mm/Makefile
index 8e105e5b3e29..824be8fb11cd 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -67,6 +67,7 @@ obj-y += page-alloc.o
obj-y += init-mm.o
obj-y += memblock.o
obj-y += $(memory-hotplug-y)
+obj-y += dmb.o

ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
diff --git a/mm/dmb.c b/mm/dmb.c
new file mode 100644
index 000000000000..f6c4e2662e0f
--- /dev/null
+++ b/mm/dmb.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Designated Movable Block
+ */
+
+#define pr_fmt(fmt) "dmb: " fmt
+
+#include <linux/dmb.h>
+
+struct dmb {
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+};
+
+static struct dmb dmb_areas[CONFIG_DMB_COUNT];
+static unsigned int dmb_area_count;
+
+int dmb_intersects(unsigned long spfn, unsigned long epfn)
+{
+ int i;
+ struct dmb *dmb;
+
+ if (spfn >= epfn)
+ return DMB_DISJOINT;
+
+ for (i = 0; i < dmb_area_count; i++) {
+ dmb = &dmb_areas[i];
+ if (spfn >= dmb->end_pfn)
+ continue;
+ if (epfn <= dmb->start_pfn)
+ return DMB_DISJOINT;
+ if (spfn >= dmb->start_pfn && epfn <= dmb->end_pfn)
+ return DMB_INTERSECTS;
+ else
+ return DMB_MIXED;
+ }
+
+ return DMB_DISJOINT;
+}
+EXPORT_SYMBOL(dmb_intersects);
+
+int __init dmb_reserve(phys_addr_t base, phys_addr_t size,
+ struct dmb **res_dmb)
+{
+ struct dmb *dmb;
+
+ /* Sanity checks */
+ if (!size || !memblock_is_region_reserved(base, size))
+ return -EINVAL;
+
+ /* ensure minimal alignment required by mm core */
+ if (!IS_ALIGNED(base | size, DMB_MIN_ALIGNMENT_BYTES))
+ return -EINVAL;
+
+ if (dmb_area_count == ARRAY_SIZE(dmb_areas)) {
+ pr_warn("Not enough slots for DMB reserved regions!\n");
+ return -ENOSPC;
+ }
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ dmb = &dmb_areas[dmb_area_count++];
+
+ dmb->start_pfn = PFN_DOWN(base);
+ dmb->end_pfn = PFN_DOWN(base + size);
+ if (res_dmb)
+ *res_dmb = dmb;
+
+ memblock_mark_movable(base, size);
+ return 0;
+}
+
+void __init dmb_init_region(struct memblock_region *region)
+{
+ unsigned long pfn;
+ int i;
+
+ for (pfn = memblock_region_memory_base_pfn(region);
+ pfn < memblock_region_memory_end_pfn(region);
+ pfn += pageblock_nr_pages) {
+ struct page *page = pfn_to_page(pfn);
+
+ for (i = 0; i < pageblock_nr_pages; i++)
+ set_page_zone(page + i, ZONE_MOVABLE);
+
+ /* free reserved pageblocks to page allocator */
+ init_reserved_pageblock(page);
+ }
+}
diff --git a/mm/memblock.c b/mm/memblock.c
index 794a099ec3e2..3db06288a5c0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -16,6 +16,7 @@
#include <linux/kmemleak.h>
#include <linux/seq_file.h>
#include <linux/memblock.h>
+#include <linux/dmb.h>

#include <asm/sections.h>
#include <linux/io.h>
@@ -2103,13 +2104,16 @@ static void __init memmap_init_reserved_pages(void)
for_each_reserved_mem_range(i, &start, &end)
reserve_bootmem_region(start, end);

- /* and also treat struct pages for the NOMAP regions as PageReserved */
for_each_mem_region(region) {
+ /* treat struct pages for the NOMAP regions as PageReserved */
if (memblock_is_nomap(region)) {
start = region->base;
end = start + region->size;
reserve_bootmem_region(start, end);
}
+ /* move Designated Movable Block pages to ZONE_MOVABLE */
+ if (memblock_is_movable(region))
+ dmb_init_region(region);
}
}

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index da1af678995b..26846a9a9fc4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -76,6 +76,7 @@
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
#include <linux/delayacct.h>
+#include <linux/dmb.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -414,6 +415,8 @@ static unsigned long required_kernelcore __initdata;
static unsigned long required_kernelcore_percent __initdata;
static unsigned long required_movablecore __initdata;
static unsigned long required_movablecore_percent __initdata;
+static unsigned long min_dmb_pfn[MAX_NUMNODES] __initdata;
+static unsigned long max_dmb_pfn[MAX_NUMNODES] __initdata;
static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata;
bool mirrored_kernelcore __initdata_memblock;

@@ -2171,7 +2174,7 @@ static int __init deferred_init_memmap(void *data)
}
zone_empty:
/* Sanity check that the next zone really is unpopulated */
- WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+ WARN_ON(++zid < ZONE_MOVABLE && populated_zone(++zone));

pr_info("node %d deferred pages initialised in %ums\n",
pgdat->node_id, jiffies_to_msecs(jiffies - start));
@@ -7022,6 +7025,10 @@ static void __init memmap_init_zone_range(struct zone *zone,
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
int nid = zone_to_nid(zone), zone_id = zone_idx(zone);

+ /* Skip overlap of ZONE_MOVABLE */
+ if (zone_id == ZONE_MOVABLE && zone_start_pfn < *hole_pfn)
+ zone_start_pfn = *hole_pfn;
+
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);

@@ -7482,6 +7489,12 @@ static unsigned long __init zone_spanned_pages_in_node(int nid,
node_start_pfn, node_end_pfn,
zone_start_pfn, zone_end_pfn);

+ if (zone_type == ZONE_MOVABLE && max_dmb_pfn[nid]) {
+ if (*zone_start_pfn == *zone_end_pfn)
+ *zone_end_pfn = max_dmb_pfn[nid];
+ *zone_start_pfn = min(*zone_start_pfn, min_dmb_pfn[nid]);
+ }
+
/* Check that this node has pages within the zone's required range */
if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
return 0;
@@ -7550,12 +7563,21 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
&zone_start_pfn, &zone_end_pfn);
nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);

+ if (zone_type == ZONE_MOVABLE && max_dmb_pfn[nid]) {
+ if (zone_start_pfn == zone_end_pfn)
+ zone_end_pfn = max_dmb_pfn[nid];
+ else
+ zone_end_pfn = zone_movable_pfn[nid];
+ zone_start_pfn = min(zone_start_pfn, min_dmb_pfn[nid]);
+ nr_absent += zone_end_pfn - zone_start_pfn;
+ }
+
/*
* ZONE_MOVABLE handling.
- * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
+ * Treat pages to be ZONE_MOVABLE in other zones as absent pages
* and vice versa.
*/
- if (mirrored_kernelcore && zone_movable_pfn[nid]) {
+ if (zone_movable_pfn[nid]) {
unsigned long start_pfn, end_pfn;
struct memblock_region *r;

@@ -7565,6 +7587,19 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
end_pfn = clamp(memblock_region_memory_end_pfn(r),
zone_start_pfn, zone_end_pfn);

+ if (memblock_is_movable(r)) {
+ if (zone_type != ZONE_MOVABLE) {
+ nr_absent += end_pfn - start_pfn;
+ continue;
+ }
+
+ nr_absent -= end_pfn - start_pfn;
+ continue;
+ }
+
+ if (!mirrored_kernelcore)
+ continue;
+
if (zone_type == ZONE_MOVABLE &&
memblock_is_mirror(r))
nr_absent += end_pfn - start_pfn;
@@ -7584,18 +7619,27 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
{
unsigned long totalpages = 0;
enum zone_type i;
+ int nid = pgdat->node_id;
+
+ /*
+ * If Designated Movable Blocks are defined on this node, ensure that
+ * zone_movable_pfn is also defined for this node.
+ */
+ if (max_dmb_pfn[nid] && !zone_movable_pfn[nid])
+ zone_movable_pfn[nid] = min(node_end_pfn,
+ arch_zone_highest_possible_pfn[movable_zone]);

for (i = 0; i < MAX_NR_ZONES; i++) {
struct zone *zone = pgdat->node_zones + i;
unsigned long zone_start_pfn, zone_end_pfn;
unsigned long spanned, absent, size;

- spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
+ spanned = zone_spanned_pages_in_node(nid, i,
node_start_pfn,
node_end_pfn,
&zone_start_pfn,
&zone_end_pfn);
- absent = zone_absent_pages_in_node(pgdat->node_id, i,
+ absent = zone_absent_pages_in_node(nid, i,
node_start_pfn,
node_end_pfn);

@@ -8047,15 +8091,27 @@ unsigned long __init node_map_pfn_alignment(void)
static unsigned long __init early_calculate_totalpages(void)
{
unsigned long totalpages = 0;
- unsigned long start_pfn, end_pfn;
- int i, nid;
+ struct memblock_region *r;

- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- unsigned long pages = end_pfn - start_pfn;
+ for_each_mem_region(r) {
+ unsigned long start_pfn, end_pfn, pages;
+ int nid;
+
+ nid = memblock_get_region_node(r);
+ start_pfn = memblock_region_memory_base_pfn(r);
+ end_pfn = memblock_region_memory_end_pfn(r);

- totalpages += pages;
- if (pages)
+ pages = end_pfn - start_pfn;
+ if (pages) {
+ totalpages += pages;
node_set_state(nid, N_MEMORY);
+ if (memblock_is_movable(r)) {
+ if (start_pfn < min_dmb_pfn[nid])
+ min_dmb_pfn[nid] = start_pfn;
+ if (end_pfn > max_dmb_pfn[nid])
+ max_dmb_pfn[nid] = end_pfn;
+ }
+ }
}
return totalpages;
}
@@ -8068,7 +8124,7 @@ static unsigned long __init early_calculate_totalpages(void)
*/
static void __init find_zone_movable_pfns_for_nodes(void)
{
- int i, nid;
+ int nid;
unsigned long usable_startpfn;
unsigned long kernelcore_node, kernelcore_remaining;
/* save the state before borrow the nodemask */
@@ -8196,13 +8252,24 @@ static void __init find_zone_movable_pfns_for_nodes(void)
kernelcore_remaining = kernelcore_node;

/* Go through each range of PFNs within this node */
- for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
+ for_each_mem_region(r) {
unsigned long size_pages;

+ if (memblock_get_region_node(r) != nid)
+ continue;
+
+ start_pfn = memblock_region_memory_base_pfn(r);
+ end_pfn = memblock_region_memory_end_pfn(r);
start_pfn = max(start_pfn, zone_movable_pfn[nid]);
if (start_pfn >= end_pfn)
continue;

+ /* Skip over Designated Movable Blocks */
+ if (memblock_is_movable(r)) {
+ zone_movable_pfn[nid] = end_pfn;
+ continue;
+ }
+
/* Account for what is only usable for kernelcore */
if (start_pfn < usable_startpfn) {
unsigned long kernel_pages;
@@ -8351,6 +8418,8 @@ void __init free_area_init(unsigned long *max_zone_pfn)
}

/* Find the PFNs that ZONE_MOVABLE begins at in each node */
+ memset(min_dmb_pfn, 0xff, sizeof(min_dmb_pfn));
+ memset(max_dmb_pfn, 0, sizeof(max_dmb_pfn));
memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
find_zone_movable_pfns_for_nodes();

--
2.34.1