[PATCH 2/2] mm/memcontrol: split local and nested atomic vmstats/vmevents counters

From: Konstantin Khlebnikov
Date: Wed Jul 17 2019 - 08:29:26 EST


This is alternative solution for problem addressed in commit 815744d75152
("mm: memcontrol: don't batch updates of local VM stats and events").

Instead of adding second set of percpu counters which wastes memory and
slows down showing statistics in cgroup-v1 this patch use two arrays of
atomic counters: local and nested statistics.

Then update has the same amount of atomic operations: local update and
one nested for each parent cgroup. Readers of hierarchical statistics
have to sum two atomics which isn't a big deal.

All updates are still batched using one set of percpu counters.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
---
include/linux/memcontrol.h | 19 +++++++----------
mm/memcontrol.c | 48 +++++++++++++++++++-------------------------
2 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44c41462be33..4dd75d50c200 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -269,16 +269,16 @@ struct mem_cgroup {
atomic_t moving_account;
struct task_struct *move_lock_task;

- /* Legacy local VM stats and events */
- struct memcg_vmstats_percpu __percpu *vmstats_local;
-
/* Subtree VM stats and events (batched updates) */
struct memcg_vmstats_percpu __percpu *vmstats_percpu;

MEMCG_PADDING(_pad2_);

- atomic_long_t vmstats[MEMCG_NR_STAT];
- atomic_long_t vmevents[NR_VM_EVENT_ITEMS];
+ atomic_long_t vmstats_local[MEMCG_NR_STAT];
+ atomic_long_t vmstats_nested[MEMCG_NR_STAT];
+
+ atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS];
+ atomic_long_t vmevents_nested[NR_VM_EVENT_ITEMS];

/* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
@@ -557,7 +557,8 @@ void unlock_page_memcg(struct page *page);
*/
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
- long x = atomic_long_read(&memcg->vmstats[idx]);
+ long x = atomic_long_read(&memcg->vmstats_local[idx]) +
+ atomic_long_read(&memcg->vmstats_nested[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
@@ -572,11 +573,7 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
int idx)
{
- long x = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
+ long x = atomic_long_read(&memcg->vmstats_local[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 06d33dfc4ec4..97debc8e4120 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -695,14 +695,13 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
if (mem_cgroup_disabled())
return;

- __this_cpu_add(memcg->vmstats_local->stat[idx], val);
-
x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
struct mem_cgroup *mi;

- for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
- atomic_long_add(x, &mi->vmstats[idx]);
+ atomic_long_add(x, &memcg->vmstats_local[idx]);
+ for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+ atomic_long_add(x, &mi->vmstats_nested[idx]);
x = 0;
}
__this_cpu_write(memcg->vmstats_percpu->stat[idx], x);
@@ -777,14 +776,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
if (mem_cgroup_disabled())
return;

- __this_cpu_add(memcg->vmstats_local->events[idx], count);
-
x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
if (unlikely(x > MEMCG_CHARGE_BATCH)) {
struct mem_cgroup *mi;

- for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
- atomic_long_add(x, &mi->vmevents[idx]);
+ atomic_long_add(x, &memcg->vmevents_local[idx]);
+ for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+ atomic_long_add(x, &mi->vmevents_nested[idx]);
x = 0;
}
__this_cpu_write(memcg->vmstats_percpu->events[idx], x);
@@ -792,17 +790,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,

static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
{
- return atomic_long_read(&memcg->vmevents[event]);
+ return atomic_long_read(&memcg->vmevents_local[event]) +
+ atomic_long_read(&memcg->vmevents_nested[event]);
}

static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
{
- long x = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- x += per_cpu(memcg->vmstats_local->events[event], cpu);
- return x;
+ return atomic_long_read(&memcg->vmevents_local[event]);
}

static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -2257,9 +2251,11 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
long x;

x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0);
- if (x)
- for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
- atomic_long_add(x, &mi->vmstats[i]);
+ if (x) {
+ atomic_long_add(x, &memcg->vmstats_local[i]);
+ for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+ atomic_long_add(x, &mi->vmstats_nested[i]);
+ }

if (i >= NR_VM_NODE_STAT_ITEMS)
continue;
@@ -2280,9 +2276,11 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
long x;

x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0);
- if (x)
- for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
- atomic_long_add(x, &mi->vmevents[i]);
+ if (x) {
+ atomic_long_add(x, &memcg->vmevents_local[i]);
+ for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+ atomic_long_add(x, &mi->vmevents_nested[i]);
+ }
}
}

@@ -4085,7 +4083,8 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
*/
static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
{
- long x = atomic_long_read(&memcg->vmstats[idx]);
+ long x = atomic_long_read(&memcg->vmstats_local[idx]) +
+ atomic_long_read(&memcg->vmstats_nested[idx]);
int cpu;

for_each_online_cpu(cpu)
@@ -4638,7 +4637,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
- free_percpu(memcg->vmstats_local);
kfree(memcg);
}

@@ -4667,10 +4665,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg->id.id < 0)
goto fail;

- memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
- if (!memcg->vmstats_local)
- goto fail;
-
memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
if (!memcg->vmstats_percpu)
goto fail;