[PATCH RFC 1/2] clone: add CLONE_AUTOREAP
From: Christian Brauner
Date: Mon Feb 16 2026 - 08:53:36 EST
Add a new clone3() flag CLONE_AUTOREAP that makes a child process
auto-reap on exit without ever becoming a zombie. This is a per-process
property in contrast to the existing auto-reap mechanism via
SA_NOCLDWAIT or SIG_IGN for SIGCHLD which applies to all children of a
given parent.
Currently the only way to automatically reap children is to set
SA_NOCLDWAIT or SIG_IGN on SIGCHLD. This is a parent-scoped property
affecting all children which makes it unsuitable for libraries or
applications that need selective auto-reaping of specific children while
still being able to wait() on others.
CLONE_AUTOREAP stores an autoreap flag in the child's signal_struct.
When the child exits do_notify_parent() checks this flag and returns
autoreap=true causing exit_notify() to transition the task directly to
EXIT_DEAD. Since the flag lives on the child it survives reparenting: if
the original parent exits and the child is reparented to a subreaper or
init the child still auto-reaps when it eventually exits.
CLONE_AUTOREAP requires CLONE_PIDFD because the process will never be
visible to wait(). The parent must use the pidfd to monitor exit via
poll() and retrieve exit status via PIDFD_GET_INFO. No exit signal is
delivered so exit_signal must be zero.
The flag is not inherited by the autoreap process's own children. Each
child that should be autoreaped must be explicitly created with
CLONE_AUTOREAP.
Link: https://github.com/uapi-group/kernel-features/issues/45
Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
---
include/linux/sched/signal.h | 1 +
include/uapi/linux/sched.h | 1 +
kernel/fork.c | 16 +++++++++++++++-
kernel/ptrace.c | 3 ++-
kernel/signal.c | 4 ++++
5 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 7d6449982822..346ecbad4c2b 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -132,6 +132,7 @@ struct signal_struct {
*/
unsigned int is_child_subreaper:1;
unsigned int has_child_subreaper:1;
+ unsigned int autoreap:1;
#ifdef CONFIG_POSIX_TIMERS
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 359a14cc76a4..e6fc5ae621e2 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -36,6 +36,7 @@
/* Flags for the clone3() syscall. */
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#define CLONE_AUTOREAP 0x400000000ULL /* Auto-reap child on exit, requires CLONE_PIDFD. */
/*
* cloning flags intersect with CSIGNAL so can be used with unshare and clone3
diff --git a/kernel/fork.c b/kernel/fork.c
index 9c5effbdbdc1..a803bdad2805 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2028,6 +2028,15 @@ __latent_entropy struct task_struct *copy_process(
return ERR_PTR(-EINVAL);
}
+ if (clone_flags & CLONE_AUTOREAP) {
+ if (!(clone_flags & CLONE_PIDFD))
+ return ERR_PTR(-EINVAL);
+ if (clone_flags & CLONE_THREAD)
+ return ERR_PTR(-EINVAL);
+ if (args->exit_signal)
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* Force any signals received before this point to be delivered
* before the fork happens. Collect up signals sent to multiple
@@ -2374,6 +2383,8 @@ __latent_entropy struct task_struct *copy_process(
p->parent_exec_id = current->parent_exec_id;
if (clone_flags & CLONE_THREAD)
p->exit_signal = -1;
+ else if (clone_flags & CLONE_AUTOREAP)
+ p->exit_signal = 0;
else
p->exit_signal = current->group_leader->exit_signal;
} else {
@@ -2435,6 +2446,8 @@ __latent_entropy struct task_struct *copy_process(
*/
p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
p->real_parent->signal->is_child_subreaper;
+ if (clone_flags & CLONE_AUTOREAP)
+ p->signal->autoreap = 1;
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
attach_pid(p, PIDTYPE_TGID);
@@ -2897,7 +2910,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
{
/* Verify that no unknown flags are passed along. */
if (kargs->flags &
- ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
+ ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP |
+ CLONE_AUTOREAP))
return false;
/*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 392ec2f75f01..68c17daef8d4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -549,7 +549,8 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
if (!dead && thread_group_empty(p)) {
if (!same_thread_group(p->real_parent, tracer))
dead = do_notify_parent(p, p->exit_signal);
- else if (ignoring_children(tracer->sighand)) {
+ else if (ignoring_children(tracer->sighand) ||
+ p->signal->autoreap) {
__wake_up_parent(p, tracer);
dead = true;
}
diff --git a/kernel/signal.c b/kernel/signal.c
index e42b8bd6922f..2fb206c84c07 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2251,6 +2251,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
sig = 0;
}
+ if (!tsk->ptrace && tsk->signal->autoreap) {
+ autoreap = true;
+ sig = 0;
+ }
/*
* Send with __send_signal as si_pid and si_uid are in the
* parent's namespaces.
--
2.47.3