Re: [PATCH] memcg: add hierarchical effective limits for v2

From: Shakeel Butt
Date: Thu Feb 06 2025 - 17:25:27 EST


Oops, I forgot to CC Andrew.

On Wed, Feb 05, 2025 at 02:20:29PM -0800, Shakeel Butt wrote:
> Memcg-v1 exposes hierarchical_[memory|memsw]_limit counters in its
> memory.stat file which applications can use to get their effective limit
> which is the minimum of limits of itself and all of its ancestors. This
> is pretty useful in environments where cgroup namespace is used and the
> application does not have access to the full view of the cgroup
> hierarchy. Let's expose effective limits for memcg v2 as well.
>
> Signed-off-by: Shakeel Butt <shakeel.butt@xxxxxxxxx>
> ---
> Documentation/admin-guide/cgroup-v2.rst | 24 +++++++++++++
> mm/memcontrol.c | 48 +++++++++++++++++++++++++
> 2 files changed, 72 insertions(+)
>
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index cb1b4e759b7e..175e9435ad5c 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1311,6 +1311,14 @@ PAGE_SIZE multiple when read back.
> Caller could retry them differently, return into userspace
> as -ENOMEM or silently ignore in cases like disk readahead.
>
> + memory.max.effective
> + A read-only single value file which exists on non-root cgroups.
> +
> + The effective limit of the cgroup i.e. the minimum memory.max
> + of all ancestors including itself. This is useful for environments
> + where cgroup namespace is being used and the application does not
> + have full view of the hierarchy.
> +
> memory.reclaim
> A write-only nested-keyed file which exists for all cgroups.
>
> @@ -1726,6 +1734,14 @@ The following nested keys are defined.
> Swap usage hard limit. If a cgroup's swap usage reaches this
> limit, anonymous memory of the cgroup will not be swapped out.
>
> + memory.swap.max.effective
> + A read-only single value file which exists on non-root cgroups.
> +
> + The effective limit of the cgroup i.e. the minimum memory.swap.max
> + of all ancestors including itself. This is useful for environments
> + where cgroup namespace is being used and the application does not
> + have full view of the hierarchy.
> +
> memory.swap.events
> A read-only flat-keyed file which exists on non-root cgroups.
> The following entries are defined. Unless specified
> @@ -1766,6 +1782,14 @@ The following nested keys are defined.
> limit, it will refuse to take any more stores before existing
> entries fault back in or are written out to disk.
>
> + memory.zswap.max.effective
> + A read-only single value file which exists on non-root cgroups.
> +
> + The effective limit of the cgroup i.e. the minimum memory.zswap.max
> + of all ancestors including itself. This is useful for environments
> + where cgroup namespace is being used and the application does not
> + have full view of the hierarchy.
> +
> memory.zswap.writeback
> A read-write single value file. The default value is "1".
> Note that this setting is hierarchical, i.e. the writeback would be
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index cae1c2e0cc71..8d21c1a44220 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4161,6 +4161,17 @@ static int memory_max_show(struct seq_file *m, void *v)
> READ_ONCE(mem_cgroup_from_seq(m)->memory.max));
> }
>
> +static int memory_max_effective_show(struct seq_file *m, void *v)
> +{
> + unsigned long max = PAGE_COUNTER_MAX;
> + struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> +
> + for (; memcg; memcg = parent_mem_cgroup(memcg))
> + max = min(max, READ_ONCE(memcg->memory.max));
> +
> + return seq_puts_memcg_tunable(m, max);
> +}
> +
> static ssize_t memory_max_write(struct kernfs_open_file *of,
> char *buf, size_t nbytes, loff_t off)
> {
> @@ -4438,6 +4449,11 @@ static struct cftype memory_files[] = {
> .seq_show = memory_max_show,
> .write = memory_max_write,
> },
> + {
> + .name = "max.effective",
> + .flags = CFTYPE_NOT_ON_ROOT,
> + .seq_show = memory_max_effective_show,
> + },
> {
> .name = "events",
> .flags = CFTYPE_NOT_ON_ROOT,
> @@ -5117,6 +5133,17 @@ static int swap_max_show(struct seq_file *m, void *v)
> READ_ONCE(mem_cgroup_from_seq(m)->swap.max));
> }
>
> +static int swap_max_effective_show(struct seq_file *m, void *v)
> +{
> + unsigned long max = PAGE_COUNTER_MAX;
> + struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> +
> + for (; memcg; memcg = parent_mem_cgroup(memcg))
> + max = min(max, READ_ONCE(memcg->swap.max));
> +
> + return seq_puts_memcg_tunable(m, max);
> +}
> +
> static ssize_t swap_max_write(struct kernfs_open_file *of,
> char *buf, size_t nbytes, loff_t off)
> {
> @@ -5166,6 +5193,11 @@ static struct cftype swap_files[] = {
> .seq_show = swap_max_show,
> .write = swap_max_write,
> },
> + {
> + .name = "swap.max.effective",
> + .flags = CFTYPE_NOT_ON_ROOT,
> + .seq_show = swap_max_effective_show,
> + },
> {
> .name = "swap.peak",
> .flags = CFTYPE_NOT_ON_ROOT,
> @@ -5308,6 +5340,17 @@ static int zswap_max_show(struct seq_file *m, void *v)
> READ_ONCE(mem_cgroup_from_seq(m)->zswap_max));
> }
>
> +static int zswap_max_effective_show(struct seq_file *m, void *v)
> +{
> + unsigned long max = PAGE_COUNTER_MAX;
> + struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> +
> + for (; memcg; memcg = parent_mem_cgroup(memcg))
> + max = min(max, READ_ONCE(memcg->zswap_max));
> +
> + return seq_puts_memcg_tunable(m, max);
> +}
> +
> static ssize_t zswap_max_write(struct kernfs_open_file *of,
> char *buf, size_t nbytes, loff_t off)
> {
> @@ -5362,6 +5405,11 @@ static struct cftype zswap_files[] = {
> .seq_show = zswap_max_show,
> .write = zswap_max_write,
> },
> + {
> + .name = "zswap.max.effective",
> + .flags = CFTYPE_NOT_ON_ROOT,
> + .seq_show = zswap_max_effective_show,
> + },
> {
> .name = "zswap.writeback",
> .seq_show = zswap_writeback_show,
> --
> 2.43.5
>