[RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss

From: Liam Ni
Date: Fri Aug 04 2023 - 11:33:10 EST


Optimize the way of calculating missing pages.

In the previous implementation, We calculate missing pages as follows:
1. calculate numaram by traverse all the numa_meminfo's and for each of
them traverse all the regions in memblock.memory to prepare for
counting missing pages.

2. Traverse all the regions in memblock.memory again to get e820ram.

3. the missing page is (e820ram - numaram )

But,it's enough to count memory in ‘memblock.memory’ that doesn't have
the node assigned.

V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@xxxxxxxxx/
V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@xxxxxxxxx/

Signed-off-by: Liam Ni <zhiguangni01@xxxxxxxxx>
---
arch/loongarch/kernel/numa.c | 23 ++++++++---------------
arch/x86/mm/numa.c | 26 +++++++-------------------
include/linux/mm.h | 1 +
mm/mm_init.c | 20 ++++++++++++++++++++
4 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 708665895b47..0239891e4d19 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+static bool __init memblock_validate_numa_coverage(const u64 limit)
{
- int i;
- u64 numaram, biosram;
+ u64 lo_pg;

- numaram = 0;
- for (i = 0; i < mi->nr_blks; i++) {
- u64 s = mi->blk[i].start >> PAGE_SHIFT;
- u64 e = mi->blk[i].end >> PAGE_SHIFT;
+ lo_pg = max_pfn - calculate_without_node_pages_in_range();

- numaram += e - s;
- numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
- if ((s64)numaram < 0)
- numaram = 0;
+ /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+ if (lo_pg >= limit) {
+ pr_err("NUMA: We lost 1m size page.\n");
+ return false;
}
- max_pfn = max_low_pfn;
- biosram = max_pfn - absent_pages_in_range(0, max_pfn);

- BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
return true;
}

@@ -428,7 +421,7 @@ int __init init_numa_memory(void)
return -EINVAL;

init_node_memblock();
- if (numa_meminfo_cover_memory(&numa_meminfo) == false)
+ if (memblock_validate_numa_coverage(SZ_1M) == false)
return -EINVAL;

for_each_node_mask(node, node_possible_map) {
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2aadb2019b4f..14feec144675 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+static bool __init memblock_validate_numa_coverage(const u64 limit)
{
- u64 numaram, e820ram;
- int i;
+ u64 lo_pg;

- numaram = 0;
- for (i = 0; i < mi->nr_blks; i++) {
- u64 s = mi->blk[i].start >> PAGE_SHIFT;
- u64 e = mi->blk[i].end >> PAGE_SHIFT;
- numaram += e - s;
- numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
- if ((s64)numaram < 0)
- numaram = 0;
- }
-
- e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
+ lo_pg = max_pfn - calculate_without_node_pages_in_range();

/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
- printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
%LuMB e820 RAM. Not used.\n",
- (numaram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
+ if (lo_pg >= limit) {
+ pr_err("NUMA: We lost 1m size page.\n");
return false;
}
+
return true;
}

@@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
numa_meminfo *mi)
return -EINVAL;
}
}
- if (!numa_meminfo_cover_memory(mi))
+ if (!memblock_validate_numa_coverage(SZ_1M))
return -EINVAL;

/* Finally register nodes. */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0daef3f2f029..b32457ad1ae3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
unsigned long start_pfn,
unsigned long end_pfn);
extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
+extern unsigned long calculate_without_node_pages_in_range(void);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3ddd18a89b66..13a4883787e3 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1132,6 +1132,26 @@ static void __init
adjust_zone_range_for_zone_movable(int nid,
}
}

+/**
+ * @start_pfn: The start PFN to start searching for holes
+ * @end_pfn: The end PFN to stop searching for holes
+ *
+ * Return: Return the number of page frames without node assigned
within a range.
+ */
+unsigned long __init calculate_without_node_pages_in_range(void)
+{
+ unsigned long num_pages;
+ unsigned long start_pfn, end_pfn;
+ int nid, i;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+ if (nid == NUMA_NO_NODE)
+ num_pages += end_pfn - start_pfn;
+ }
+
+ return num_pages;
+}
+
/*
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
--
2.25.1