[RFC PATCH] memcg: adjust memcg for new cgroup allocations
From: Vasily Averin
Date: Wed Aug 17 2022 - 03:42:54 EST
This is first of patches announced here:
https://lore.kernel.org/all/62188f37-f816-08e9-cdd5-8df23131746d@xxxxxxxxxx/
---
Usually accounted allocations use memory cgroup of current process.
In case of cgroup this looks incorrect: memory cgroup can be created
both from the host and from inside container. At the same time
it is intuitively expected that these both operations should lead
to the same result.
This issue was addressed by Roman Gushchin in commit 3e38e0aaca9e
"mm: memcg: charge memcg percpu memory to the parent cgroup",
He adjusted memcg for allocations called inside mem_cgroup_css_alloc.
However, now we want to enable accounting for some other cgroup-related
resources called from cgroup_mkdir. We would like to guarantee that
all new accounted allocation will be charged to the same memory cgroup.
This patch moves memcg adjustment from mem_cgroup_css_alloc() to its
callers: cgroup_mkdir() and cgroup_apply_control(). In own turn this
requires to create new proxy function mem_cgroup_from_cgroup().
Signed-off-by: Vasily Averin <vvs@xxxxxxxxxx>
---
include/linux/memcontrol.h | 18 ++++++++++++++++++
kernel/cgroup/cgroup.c | 22 ++++++++++++++++------
mm/memcontrol.c | 4 +---
3 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d31ce55b1c0..342426d1edbf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1813,6 +1813,19 @@ static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
{
return memcg ? memcg : root_mem_cgroup;
}
+
+static inline struct mem_cgroup *mem_cgroup_from_cgroup(struct cgroup *cgroup)
+{
+ struct cgroup_subsys_state *css;
+ struct mem_cgroup *memcg = NULL;
+
+ css = cgroup_get_e_css(cgroup, &memory_cgrp_subsys);
+
+ if (css)
+ memcg = container_of(css, struct mem_cgroup, css);
+
+ return mem_cgroup_or_root(memcg);
+}
#else
static inline bool mem_cgroup_kmem_disabled(void)
{
@@ -1878,6 +1891,11 @@ static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
{
return NULL;
}
+
+static inline struct mem_cgroup *mem_cgroup_from_cgroup(struct cgroup *cgroup)
+{
+ return NULL;
+}
#endif /* CONFIG_MEMCG_KMEM */
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ffaccd6373f1..544d93a8878f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3247,13 +3247,16 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp)
*/
static int cgroup_apply_control(struct cgroup *cgrp)
{
+ struct mem_cgroup *memcg, *old_memcg;
int ret;
cgroup_propagate_control(cgrp);
+ memcg = mem_cgroup_from_cgroup(cgrp);
+ old_memcg = set_active_memcg(memcg);
ret = cgroup_apply_control_enable(cgrp);
if (ret)
- return ret;
+ goto out_memcg;
/*
* At this point, cgroup_e_css_by_mask() results reflect the new csses
@@ -3261,10 +3264,11 @@ static int cgroup_apply_control(struct cgroup *cgrp)
* css associations of all tasks in the subtree.
*/
ret = cgroup_update_dfl_csses(cgrp);
- if (ret)
- return ret;
- return 0;
+out_memcg:
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+ return ret;
}
/**
@@ -5532,6 +5536,7 @@ static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
{
struct cgroup *parent, *cgrp;
+ struct mem_cgroup *memcg, *old_memcg;
int ret;
/* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
@@ -5547,10 +5552,12 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
goto out_unlock;
}
+ memcg = mem_cgroup_from_cgroup(parent);
+ old_memcg = set_active_memcg(memcg);
cgrp = cgroup_create(parent, name, mode);
if (IS_ERR(cgrp)) {
ret = PTR_ERR(cgrp);
- goto out_unlock;
+ goto out_memcg;
}
/*
@@ -5577,10 +5584,13 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
kernfs_activate(cgrp->kn);
ret = 0;
- goto out_unlock;
+ goto out_memcg;
out_destroy:
cgroup_destroy_locked(cgrp);
+out_memcg:
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
out_unlock:
cgroup_kn_unlock(parent_kn);
return ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b69979c9ced5..e170c64e66e2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5239,11 +5239,9 @@ static struct cgroup_subsys_state * __ref
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
- struct mem_cgroup *memcg, *old_memcg;
+ struct mem_cgroup *memcg;
- old_memcg = set_active_memcg(parent);
memcg = mem_cgroup_alloc();
- set_active_memcg(old_memcg);
if (IS_ERR(memcg))
return ERR_CAST(memcg);
--
2.34.1