[RFC PATCH 1/3] cgroup: list all subsystem states in debugfs files
From: Yakunin, Dmitry (Nebius)
Date: Mon Sep 11 2023 - 03:55:38 EST
After removing cgroup subsystem state could leak or live in background
forever because it is pinned by some reference. For example memory cgroup
could be pinned by pages in cache or tmpfs.
This patch adds common debugfs interface for listing basic state for each
controller. Controller could define callback for dumping own attributes.
In file /sys/kernel/debug/cgroup/<controller> each line shows state in
format: <common_attr>=<value>... [-- <controller_attr>=<value>... ]
Common attributes:
css - css pointer
cgroup - cgroup pointer
id - css id
ino - cgroup inode
flags - css flags
refcnt - css atomic refcount, for online shows huge bias
path - cgroup path
This patch adds memcg attributes:
mem_id - 16-bit memory cgroup id
memory - charged pages
memsw - charged memory+swap for v1 and swap for v2
kmem - charged kernel pages
tcpmem - charged tcp pages
shmem - shmem/tmpfs pages
Link: https://lore.kernel.org/lkml/153414348591.737150.14229960913953276515.stgit@buzz
Suggested-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
Reviewed-by: Andrey Ryabinin <arbn@xxxxxxxxxxxxxxx>
Signed-off-by: Dmitry Yakunin <zeil@xxxxxxxxxx>
---
include/linux/cgroup-defs.h | 1 +
kernel/cgroup/cgroup.c | 101 ++++++++++++++++++++++++++++++++++++
mm/memcontrol.c | 14 +++++
3 files changed, 116 insertions(+)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8a0d5466c7be..810bd300cbee 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -673,6 +673,7 @@ struct cgroup_subsys {
void (*exit)(struct task_struct *task);
void (*release)(struct task_struct *task);
void (*bind)(struct cgroup_subsys_state *root_css);
+ void (*css_dump)(struct cgroup_subsys_state *css, struct seq_file *m);
bool early_init:1;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 625d7483951c..fb9931ff7570 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -40,6 +40,7 @@
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
@@ -7068,3 +7069,103 @@ static int __init cgroup_sysfs_init(void)
subsys_initcall(cgroup_sysfs_init);
#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_DEBUG_FS
+void *css_debugfs_seqfile_start(struct seq_file *m, loff_t *pos)
+{
+ struct cgroup_subsys *ss = m->private;
+ struct cgroup_subsys_state *css;
+ int id = *pos;
+
+ rcu_read_lock();
+ css = idr_get_next(&ss->css_idr, &id);
+ *pos = id;
+ return css;
+}
+
+void *css_debugfs_seqfile_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct cgroup_subsys *ss = m->private;
+ struct cgroup_subsys_state *css;
+ int id = *pos + 1;
+
+ css = idr_get_next(&ss->css_idr, &id);
+ *pos = id;
+ return css;
+}
+
+void css_debugfs_seqfile_stop(struct seq_file *m, void *v)
+{
+ rcu_read_unlock();
+}
+
+int css_debugfs_seqfile_show(struct seq_file *m, void *v)
+{
+ struct cgroup_subsys *ss = m->private;
+ struct cgroup_subsys_state *css = v;
+ /* data is NULL for root cgroup_subsys_state */
+ struct percpu_ref_data *data = css->refcnt.data;
+ size_t buflen;
+ char *buf;
+ int len;
+
+ seq_printf(m, "css=%pK cgroup=%pK id=%d ino=%lu flags=%#x refcnt=%lu path=",
+ css, css->cgroup, css->id, cgroup_ino(css->cgroup),
+ css->flags, data ? atomic_long_read(&data->count) : 0);
+
+ buflen = seq_get_buf(m, &buf);
+ if (buf) {
+ len = cgroup_path(css->cgroup, buf, buflen);
+ seq_commit(m, len < buflen ? len : -1);
+ }
+
+ if (ss->css_dump) {
+ seq_puts(m, " -- ");
+ ss->css_dump(css, m);
+ }
+
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static const struct seq_operations css_debug_seq_ops = {
+ .start = css_debugfs_seqfile_start,
+ .next = css_debugfs_seqfile_next,
+ .stop = css_debugfs_seqfile_stop,
+ .show = css_debugfs_seqfile_show,
+};
+
+static int css_debugfs_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &css_debug_seq_ops);
+ struct seq_file *m = file->private_data;
+
+ if (!ret)
+ m->private = inode->i_private;
+ return ret;
+}
+
+static const struct file_operations css_debugfs_fops = {
+ .open = css_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init css_debugfs_init(void)
+{
+ struct cgroup_subsys *ss;
+ struct dentry *dir;
+ int ssid;
+
+ dir = debugfs_create_dir("cgroup", NULL);
+ if (dir) {
+ for_each_subsys(ss, ssid)
+ debugfs_create_file(ss->name, 0644, dir, ss,
+ &css_debugfs_fops);
+ }
+
+ return 0;
+}
+late_initcall(css_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4b27e245a055..7b3d4a10ac63 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5654,6 +5654,20 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
}
}
+static void mem_cgroup_css_dump(struct cgroup_subsys_state *css,
+ struct seq_file *m)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+ seq_printf(m, "mem_id=%u memory=%lu memsw=%lu kmem=%lu tcpmem=%lu shmem=%lu",
+ mem_cgroup_id(memcg),
+ page_counter_read(&memcg->memory),
+ page_counter_read(&memcg->memsw),
+ page_counter_read(&memcg->kmem),
+ page_counter_read(&memcg->tcpmem),
+ memcg_page_state(memcg, NR_SHMEM));
+}
+
#ifdef CONFIG_MMU
/* Handlers for move charge at task migration. */
static int mem_cgroup_do_precharge(unsigned long count)
--
2.25.1