[PATCH 3/5] mm: release memcg percpu data prematurely

From: Roman Gushchin
Date: Thu Mar 07 2019 - 18:00:47 EST


To reduce the memory footprint of a dying memory cgroup, let's
release massive percpu data (vmstats_percpu) as early as possible,
and use atomic counterparts instead.

A dying cgroup can remain in the dying state for quite a long
time, being pinned in memory by any reference. For example,
if a page mlocked by some other cgroup, is charged to the dying
cgroup, it won't go away until the page will be released.

A dying memory cgroup can have some memory activity (e.g. dirty
pages can be flushed after cgroup removal), but in general it's
not expected to be very active in comparison to living cgroups.

So reducing the memory footprint by releasing percpu data
and switching over to atomics seems to be a good trade off.

Signed-off-by: Roman Gushchin <guro@xxxxxx>
---
include/linux/memcontrol.h | 4 ++++
mm/memcontrol.c | 24 +++++++++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8ac04632002a..569337514230 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -275,6 +275,10 @@ struct mem_cgroup {

/* memory.stat */
struct memcg_vmstats_percpu __rcu /* __percpu */ *vmstats_percpu;
+ struct memcg_vmstats_percpu __percpu *vmstats_percpu_offlined;
+
+ /* used to release non-used percpu memory */
+ struct rcu_head rcu;

MEMCG_PADDING(_pad2_);

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8f3cac02221a..8c55954e6f23 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4469,7 +4469,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)

for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
- free_percpu(memcg->vmstats_percpu);
+ WARN_ON_ONCE(memcg->vmstats_percpu != NULL);
kfree(memcg);
}

@@ -4612,6 +4612,26 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
return 0;
}

+static void mem_cgroup_free_percpu(struct rcu_head *rcu)
+{
+ struct mem_cgroup *memcg = container_of(rcu, struct mem_cgroup, rcu);
+
+ free_percpu(memcg->vmstats_percpu_offlined);
+ WARN_ON_ONCE(memcg->vmstats_percpu);
+
+ css_put(&memcg->css);
+}
+
+static void mem_cgroup_offline_percpu(struct mem_cgroup *memcg)
+{
+ memcg->vmstats_percpu_offlined = (struct memcg_vmstats_percpu __percpu*)
+ rcu_dereference(memcg->vmstats_percpu);
+ rcu_assign_pointer(memcg->vmstats_percpu, NULL);
+
+ css_get(&memcg->css);
+ call_rcu(&memcg->rcu, mem_cgroup_free_percpu);
+}
+
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
@@ -4638,6 +4658,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
drain_all_stock(memcg);

mem_cgroup_id_put(memcg);
+
+ mem_cgroup_offline_percpu(memcg);
}

static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
--
2.20.1