[PATCH 4/4] cgroup/bpf: Honor cgroup NS in cgroup_iter for ancestors

From: Michal Koutný
Date: Fri Aug 26 2022 - 12:53:19 EST


The iterator with BPF_CGROUP_ITER_ANCESTORS_UP can traverse up across a
cgroup namespace level, which may be surprising within a non-init cgroup
namespace.

Introduce and use a new cgroup_parent_ns() helper that stops according
to cgroup namespace boundary. With BPF_CGROUP_ITER_ANCESTORS_UP. We use
the cgroup namespace of the iterator caller, not that one of the creator
(might be different, the former is relevant).

Fixes: d4ccaf58a847 ("bpf: Introduce cgroup iter")
Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
---
include/linux/cgroup.h | 3 +++
kernel/bpf/cgroup_iter.c | 9 ++++++---
kernel/cgroup/cgroup.c | 32 +++++++++++++++++++++++---------
3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b6a9528374a8..b63a80e03fae 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -858,6 +858,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns);

+struct cgroup *cgroup_parent_ns(struct cgroup *cgrp,
+ struct cgroup_namespace *ns);
+
#else /* !CONFIG_CGROUPS */

static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index c69bce2f4403..06ee4a0c5870 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -104,6 +104,7 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v;
struct cgroup_iter_priv *p = seq->private;
+ struct cgroup *parent;

++*pos;
if (p->terminate)
@@ -113,9 +114,11 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return css_next_descendant_pre(curr, p->start_css);
else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
return css_next_descendant_post(curr, p->start_css);
- else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
- return curr->parent;
- else /* BPF_CGROUP_ITER_SELF_ONLY */
+ else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP) {
+ parent = cgroup_parent_ns(curr->cgroup,
+ current->nsproxy->cgroup_ns);
+ return parent ? &parent->self : NULL;
+ } else /* BPF_CGROUP_ITER_SELF_ONLY */
return NULL;
}

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c0377726031f..d60b5dfbbbc9 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1417,11 +1417,11 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}

/*
- * look up cgroup associated with current task's cgroup namespace on the
+ * look up cgroup associated with given cgroup namespace on the
* specified hierarchy
*/
-static struct cgroup *
-current_cgns_cgroup_from_root(struct cgroup_root *root)
+static struct cgroup *cgns_cgroup_from_root(struct cgroup_root *root,
+ struct cgroup_namespace *ns)
{
struct cgroup *res = NULL;
struct css_set *cset;
@@ -1430,7 +1430,7 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)

rcu_read_lock();

- cset = current->nsproxy->cgroup_ns->root_cset;
+ cset = ns->root_cset;
res = __cset_cgroup_from_root(cset, root);

rcu_read_unlock();
@@ -1852,15 +1852,15 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
int len = 0;
char *buf = NULL;
struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
- struct cgroup *ns_cgroup;
+ struct cgroup *root_cgroup;

buf = kmalloc(PATH_MAX, GFP_KERNEL);
if (!buf)
return -ENOMEM;

spin_lock_irq(&css_set_lock);
- ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
- len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
+ root_cgroup = cgns_cgroup_from_root(kf_cgroot, current->nsproxy->cgroup_ns);
+ len = kernfs_path_from_node(kf_node, root_cgroup->kn, buf, PATH_MAX);
spin_unlock_irq(&css_set_lock);

if (len >= PATH_MAX)
@@ -2330,6 +2330,18 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
}
EXPORT_SYMBOL_GPL(cgroup_path_ns);

+struct cgroup *cgroup_parent_ns(struct cgroup *cgrp,
+ struct cgroup_namespace *ns)
+{
+ struct cgroup *root_cgrp;
+
+ spin_lock_irq(&css_set_lock);
+ root_cgrp = cgns_cgroup_from_root(cgrp->root, ns);
+ spin_unlock_irq(&css_set_lock);
+
+ return cgrp == root_cgrp ? NULL : cgroup_parent(cgrp);
+}
+
/**
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
@@ -6031,7 +6043,8 @@ struct cgroup *cgroup_get_from_id(u64 id)
goto out;

spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
+ root_cgrp = cgns_cgroup_from_root(&cgrp_dfl_root,
+ current->nsproxy->cgroup_ns);
spin_unlock_irq(&css_set_lock);
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
cgroup_put(cgrp);
@@ -6612,7 +6625,8 @@ struct cgroup *cgroup_get_from_path(const char *path)
struct cgroup *root_cgrp;

spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
+ root_cgrp = cgns_cgroup_from_root(&cgrp_dfl_root,
+ current->nsproxy->cgroup_ns);
kn = kernfs_walk_and_get(root_cgrp->kn, path);
spin_unlock_irq(&css_set_lock);
if (!kn)
--
2.37.0