[RFC PATCH v0 2/3] percpu: Limit percpu allocator to online cpus

From: Bharata B Rao
Date: Tue Jun 01 2021 - 02:52:26 EST


Now that percpu allocator supports growing of memory
for newly coming up CPU at hotplug time, limit the allocation,
mapping and memcg charging of memory to online CPUs.

Also change the Percpu memory reporting in /proc/meminfo
to reflect the populated pages of only online CPUs.

TODO: Address percpu memcg charging and uncharging from
CPU hotplug callback.

Signed-off-by: Bharata B Rao <bharata@xxxxxxxxxxxxx>
---
mm/percpu-vm.c | 12 ++++++------
mm/percpu.c | 20 +++++++++++++-------
2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 3250e1c9aeaf..79ce104c963a 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -118,7 +118,7 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
unsigned int cpu;
int i;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
for (i = page_start; i < page_end; i++) {
struct page *page = pages[pcpu_page_idx(cpu, i)];

@@ -149,7 +149,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,

gfp |= __GFP_HIGHMEM;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
for (i = page_start; i < page_end; i++) {
struct page **pagep = &pages[pcpu_page_idx(cpu, i)];

@@ -164,7 +164,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
while (--i >= page_start)
__free_page(pages[pcpu_page_idx(cpu, i)]);

- for_each_possible_cpu(tcpu) {
+ for_each_online_cpu(tcpu) {
if (tcpu == cpu)
break;
for (i = page_start; i < page_end; i++)
@@ -248,7 +248,7 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
unsigned int cpu;
int i;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
for (i = page_start; i < page_end; i++) {
struct page *page;

@@ -344,7 +344,7 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
unsigned int cpu, tcpu;
int i, err;

- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
&pages[pcpu_page_idx(cpu, page_start)],
page_end - page_start);
@@ -357,7 +357,7 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
}
return 0;
err:
- for_each_possible_cpu(tcpu) {
+ for_each_online_cpu(tcpu) {
if (tcpu == cpu)
break;
__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
diff --git a/mm/percpu.c b/mm/percpu.c
index ca8ca541bede..83b6bcfcfa80 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1594,7 +1594,7 @@ static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
if (!objcg)
return PCPU_CHUNK_ROOT;

- if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) {
+ if (obj_cgroup_charge(objcg, gfp, size * num_online_cpus())) {
obj_cgroup_put(objcg);
return PCPU_FAIL_ALLOC;
}
@@ -1615,10 +1615,10 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,

rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- size * num_possible_cpus());
+ size * num_online_cpus());
rcu_read_unlock();
} else {
- obj_cgroup_uncharge(objcg, size * num_possible_cpus());
+ obj_cgroup_uncharge(objcg, size * num_online_cpus());
obj_cgroup_put(objcg);
}
}
@@ -1633,11 +1633,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;

- obj_cgroup_uncharge(objcg, size * num_possible_cpus());
+ obj_cgroup_uncharge(objcg, size * num_online_cpus());

rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- -(size * num_possible_cpus()));
+ -(size * num_online_cpus()));
rcu_read_unlock();

obj_cgroup_put(objcg);
@@ -1680,6 +1680,9 @@ static void pcpu_cpuhp_deregister(struct pcpu_chunk *chunk,
}
}

+/*
+ * TODO: Grow the memcg charge
+ */
static void __pcpu_cpuhp_setup(enum pcpu_chunk_type type, unsigned int cpu)
{
int slot;
@@ -1720,6 +1723,9 @@ static int percpu_cpuhp_setup(unsigned int cpu)
return 0;
}

+/*
+ * TODO: Reduce the memcg charge
+ */
static void __pcpu_cpuhp_destroy(enum pcpu_chunk_type type, unsigned int cpu)
{
int slot;
@@ -2000,7 +2006,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
pcpu_schedule_balance_work();

/* clear the areas and return address relative to base address */
- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);

ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
@@ -3372,7 +3378,7 @@ void __init setup_per_cpu_areas(void)
*/
unsigned long pcpu_nr_pages(void)
{
- return pcpu_nr_populated * pcpu_nr_units;
+ return pcpu_nr_populated * num_online_cpus();
}

/*
--
2.31.1