[PATCH v2 8/9] mm: percpu: skip the per-cpu node walk on single-node systems
From: Alexandre Ghiti
Date: Fri Jun 26 2026 - 06:33:22 EST
pcpu_memcg_{post_alloc,free}_hook() determine each backing page's NUMA
node by walking the chunk with vmalloc_to_page() once per possible CPU
(plus the obj_exts vmalloc pages). On a single-node system that walk is
pure overhead: with only one online node, page_to_nid() can only return
that node, so the whole allocation footprint necessarily lives there.
Add a fast path in pcpu_memcg_accumulate(): when nr_online_nodes == 1,
attribute pcpu_obj_full_size() (payload + obj_exts metadata) to
first_online_node and return, skipping the O(num_possible_cpus)
vmalloc_to_page() walk entirely. The result is identical to walking the
pages, since every page is on that node.
Signed-off-by: Alexandre Ghiti <alex@xxxxxxxx>
---
mm/percpu.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/mm/percpu.c b/mm/percpu.c
index 9224344d4b8e..9a735d01b23a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1689,6 +1689,18 @@ static void pcpu_memcg_accumulate_obj_exts(struct pcpu_chunk *chunk, int off,
}
}
+static void pcpu_memcg_accumulate(struct pcpu_chunk *chunk, int off, size_t size,
+ unsigned int *node_bytes)
+{
+ if (nr_online_nodes == 1) {
+ node_bytes[first_online_node] = pcpu_obj_full_size(size);
+ return;
+ }
+
+ pcpu_memcg_accumulate_pages(chunk, off, size, node_bytes);
+ pcpu_memcg_accumulate_obj_exts(chunk, off, size, node_bytes);
+}
+
static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
struct pcpu_chunk *chunk, int off,
size_t size)
@@ -1705,8 +1717,7 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
obj_cgroup_get(objcg);
chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg;
- pcpu_memcg_accumulate_pages(chunk, off, size, node_bytes);
- pcpu_memcg_accumulate_obj_exts(chunk, off, size, node_bytes);
+ pcpu_memcg_accumulate(chunk, off, size, node_bytes);
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
@@ -1748,8 +1759,7 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
return;
chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL;
- pcpu_memcg_accumulate_pages(chunk, off, size, node_bytes);
- pcpu_memcg_accumulate_obj_exts(chunk, off, size, node_bytes);
+ pcpu_memcg_accumulate(chunk, off, size, node_bytes);
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
--
2.54.0