From 8d04c38137c71d1577a8576fb75db07f3bf92491 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Mon, 22 Jan 2024 21:35:29 +0000 Subject: [PATCH] mm: memcg: optimize parent iteration in memcg_rstat_updated() Signed-off-by: Yosry Ahmed --- mm/memcontrol.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c5aa0c2cb68b2..b5ec4a8413215 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -634,6 +634,10 @@ struct memcg_vmstats_percpu { /* Stats updates since the last flush */ unsigned int stats_updates; + + /* Cached pointers for fast updates in memcg_rstat_updated() */ + struct memcg_vmstats_percpu *parent; + struct memcg_vmstats *vmstats; }; struct memcg_vmstats { @@ -698,36 +702,34 @@ static void memcg_stats_unlock(void) } -static bool memcg_should_flush_stats(struct mem_cgroup *memcg) +static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats) { - return atomic64_read(&memcg->vmstats->stats_updates) > + return atomic64_read(&vmstats->stats_updates) > MEMCG_CHARGE_BATCH * num_online_cpus(); } static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) { + struct memcg_vmstats_percpu *statc; int cpu = smp_processor_id(); - unsigned int x; if (!val) return; cgroup_rstat_updated(memcg->css.cgroup, cpu); - - for (; memcg; memcg = parent_mem_cgroup(memcg)) { - x = __this_cpu_add_return(memcg->vmstats_percpu->stats_updates, - abs(val)); - - if (x < MEMCG_CHARGE_BATCH) + statc = this_cpu_ptr(memcg->vmstats_percpu); + for (; statc; statc = statc->parent) { + statc->stats_updates += abs(val); + if (statc->stats_updates < MEMCG_CHARGE_BATCH) continue; /* * If @memcg is already flush-able, increasing stats_updates is * redundant. Avoid the overhead of the atomic update. */ - if (!memcg_should_flush_stats(memcg)) - atomic64_add(x, &memcg->vmstats->stats_updates); - __this_cpu_write(memcg->vmstats_percpu->stats_updates, 0); + if (!memcg_vmstats_needs_flush(statc->vmstats)) + atomic64_add(x, &statc->vmstats->stats_updates); + statc->stats_updates = 0; } } @@ -751,7 +753,7 @@ static void do_flush_stats(void) void mem_cgroup_flush_stats(void) { - if (memcg_should_flush_stats(root_mem_cgroup)) + if (memcg_vmstats_needs_flush(root_mem_cgroup->vmstats)) do_flush_stats(); } @@ -765,7 +767,7 @@ void mem_cgroup_flush_stats_ratelimited(void) static void flush_memcg_stats_dwork(struct work_struct *w) { /* - * Deliberately ignore memcg_should_flush_stats() here so that flushing + * Deliberately ignore memcg_vmstats_needs_flush() here so that flushing * in latency-sensitive paths is as cheap as possible. */ do_flush_stats(); @@ -5453,10 +5455,11 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) __mem_cgroup_free(memcg); } -static struct mem_cgroup *mem_cgroup_alloc(void) +static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) { + struct memcg_vmstats_percpu *statc, *pstatc; struct mem_cgroup *memcg; - int node; + int node, cpu; int __maybe_unused i; long error = -ENOMEM; @@ -5480,6 +5483,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (!memcg->vmstats_percpu) goto fail; + for_each_possible_cpu(cpu) { + if (parent) + pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu); + statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); + statc->parent = parent ? pstatc : NULL; + statc->vmstats = memcg->vmstats; + } + for_each_node(node) if (alloc_mem_cgroup_per_node_info(memcg, node)) goto fail; @@ -5525,7 +5536,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) struct mem_cgroup *memcg, *old_memcg; old_memcg = set_active_memcg(parent); - memcg = mem_cgroup_alloc(); + memcg = mem_cgroup_alloc(parent); set_active_memcg(old_memcg); if (IS_ERR(memcg)) return ERR_CAST(memcg); -- 2.43.0.429.g432eaa2c6b-goog