[PATCH] cgroup: fix race between fork and cgroup.kill

From: Shakeel Butt
Date: Wed Jan 29 2025 - 13:53:21 EST


Signed-off-by: Shakeel Butt <shakeel.butt@xxxxxxxxx>
---
include/linux/cgroup-defs.h | 6 +++---
include/linux/sched/task.h | 1 +
kernel/cgroup/cgroup.c | 17 +++++++++--------
3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1b20d2d8ef7c..0d8c12c0efdb 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -71,9 +71,6 @@ enum {

/* Cgroup is frozen. */
CGRP_FROZEN,
-
- /* Control group has to be killed. */
- CGRP_KILL,
};

/* cgroup_root->flags */
@@ -520,6 +517,9 @@ struct cgroup {
struct cgroup_rstat_cpu __percpu *rstat_cpu;
struct list_head rstat_css_list;

+ /* sequence number for cgroup.kill, serialized by css_set_lock. */
+ unsigned long kill_seq;
+
/*
* Add padding to separate the read mostly rstat_cpu and
* rstat_css_list into a different cacheline from the following
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0f2aeb37bbb0..ce56ae0a9cbb 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -43,6 +43,7 @@ struct kernel_clone_args {
void *fn_arg;
struct cgroup *cgrp;
struct css_set *cset;
+ unsigned long kill_seq;
};

/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 805764cf14e2..5aec3b7bc084 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4013,7 +4013,7 @@ static void __cgroup_kill(struct cgroup *cgrp)
lockdep_assert_held(&cgroup_mutex);

spin_lock_irq(&css_set_lock);
- set_bit(CGRP_KILL, &cgrp->flags);
+ cgrp->kill_seq++;
spin_unlock_irq(&css_set_lock);

css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it);
@@ -4029,10 +4029,6 @@ static void __cgroup_kill(struct cgroup *cgrp)
send_sig(SIGKILL, task, 0);
}
css_task_iter_end(&it);
-
- spin_lock_irq(&css_set_lock);
- clear_bit(CGRP_KILL, &cgrp->flags);
- spin_unlock_irq(&css_set_lock);
}

static void cgroup_kill(struct cgroup *cgrp)
@@ -6488,6 +6484,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
spin_lock_irq(&css_set_lock);
cset = task_css_set(current);
get_css_set(cset);
+ kargs->kill_seq = kargs->cgrp->kill_seq;
spin_unlock_irq(&css_set_lock);

if (!(kargs->flags & CLONE_INTO_CGROUP)) {
@@ -6670,6 +6667,7 @@ void cgroup_post_fork(struct task_struct *child,
{
unsigned long cgrp_flags = 0;
bool kill = false;
+ unsigned long cgrp_kill_seq = 0;
struct cgroup_subsys *ss;
struct css_set *cset;
int i;
@@ -6681,10 +6679,13 @@ void cgroup_post_fork(struct task_struct *child,

/* init tasks are special, only link regular threads */
if (likely(child->pid)) {
- if (kargs->cgrp)
+ if (kargs->cgrp) {
cgrp_flags = kargs->cgrp->flags;
- else
+ cgrp_kill_seq = kargs->cgrp->kill_seq;
+ } else {
cgrp_flags = cset->dfl_cgrp->flags;
+ cgrp_kill_seq = cset->dfl_cgrp->kill_seq;
+ }

WARN_ON_ONCE(!list_empty(&child->cg_list));
cset->nr_tasks++;
@@ -6719,7 +6720,7 @@ void cgroup_post_fork(struct task_struct *child,
* child down right after we finished preparing it for
* userspace.
*/
- kill = test_bit(CGRP_KILL, &cgrp_flags);
+ kill = kargs->kill_seq != cgrp_kill_seq;
}

spin_unlock_irq(&css_set_lock);
--
2.43.5