Re: [GIT PULL] pidfd fixes

From: Christian Brauner
Date: Tue Jul 23 2019 - 06:25:52 EST


On Tue, Jul 23, 2019 at 12:12:49PM +0200, Oleg Nesterov wrote:
> On 07/22, Linus Torvalds wrote:
> >
> > So if we set EXIT_ZOMBIE early, then I think we should change the
> > EXIT_DEAD case too. IOW, do something like this on top:
> >
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -734,9 +734,10 @@ static void exit_notify(struct task_struct
> > *tsk, int group_dead)
> > autoreap = true;
> > }
> >
> > - tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
> > - if (tsk->exit_state == EXIT_DEAD)
> > + if (autoreap) {
> > + tsk->exit_state = EXIT_DEAD;
> > list_add(&tsk->ptrace_entry, &dead);
> > + }
>
> Yes, this needs cleanups. Actually I was going to suggest another change
> below, this way do_notify_pidfd() is only called when it is really needed.
> But then I decided a trivial one-liner makes more sense for the start.

Yeah, that was my thinking exactly.

>
> I'll try to think. Perhaps we should also change do_notify_parent() to set
> p->exit_state, at least if autoreap. Then the early exit_state = EXIT_ZOMBIE
> won't look so confusing and we can do more (minor) cleanups.

You mind sending that as a proper patch?
I also have a few more cleanups for other parts I intend to send later
this week. I'd pick this one up too.

>
> Oleg.
>
> --- x/kernel/exit.c
> +++ x/kernel/exit.c
> @@ -182,6 +182,13 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
> put_task_struct(tsk);
> }
>
> +static void do_notify_pidfd(struct task_struct *task)
> +{
> + struct pid *pid;
> +
> + pid = task_pid(task);
> + wake_up_all(&pid->wait_pidfd);
> +}
>
> void release_task(struct task_struct *p)
> {
> @@ -218,6 +225,8 @@ void release_task(struct task_struct *p)
> zap_leader = do_notify_parent(leader, leader->exit_signal);
> if (zap_leader)
> leader->exit_state = EXIT_DEAD;
> +
> + do_notify_pidfd(leader);
> }
>
> write_unlock_irq(&tasklist_lock);
> @@ -710,7 +719,7 @@ static void forget_original_parent(struct task_struct *father,
> */
> static void exit_notify(struct task_struct *tsk, int group_dead)
> {
> - bool autoreap;
> + bool autoreap, xxx;

In case you don't mind the length of it how about:

bool autoreap, leading_empty_thread_group;

It's not the prettiest names but having rather descriptive var names in
these codepaths seems like a good idea to me.
It also reads very naturally further below:

if (leading_empty_thread_group || unlikely(tsk->ptrace)) {
int sig = leading_empty_thread_group && !ptrace_reparented(tsk)
? tsk->exit_signal : SIGCHLD;

> struct task_struct *p, *n;
> LIST_HEAD(dead);
>
> @@ -720,23 +729,22 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
> if (group_dead)
> kill_orphaned_pgrp(tsk->group_leader, NULL);
>
> - if (unlikely(tsk->ptrace)) {
> - int sig = thread_group_leader(tsk) &&
> - thread_group_empty(tsk) &&
> - !ptrace_reparented(tsk) ?
> - tsk->exit_signal : SIGCHLD;
> + autoreap = true;
> + xxx = thread_group_leader(tsk) && thread_group_empty(tsk);
> +
> + if (xxx || unlikely(tsk->ptrace)) {
> + int sig = xxx && !ptrace_reparented(tsk)
> + ? tsk->exit_signal : SIGCHLD;
> autoreap = do_notify_parent(tsk, sig);
> - } else if (thread_group_leader(tsk)) {
> - autoreap = thread_group_empty(tsk) &&
> - do_notify_parent(tsk, tsk->exit_signal);
> - } else {
> - autoreap = true;
> }
>
> tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
> if (tsk->exit_state == EXIT_DEAD)
> list_add(&tsk->ptrace_entry, &dead);
>
> + if (xxx)
> + do_notify_pidfd(tsk);
> +
> /* mt-exec, de_thread() is waiting for group leader */
> if (unlikely(tsk->signal->notify_count < 0))
> wake_up_process(tsk->signal->group_exit_task);
> --- x/kernel/signal.c
> +++ x/kernel/signal.c
> @@ -1881,14 +1881,6 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
> return ret;
> }
>
> -static void do_notify_pidfd(struct task_struct *task)
> -{
> - struct pid *pid;
> -
> - pid = task_pid(task);
> - wake_up_all(&pid->wait_pidfd);
> -}
> -
> /*
> * Let a parent know about the death of a child.
> * For a stopped/continued status change, use do_notify_parent_cldstop instead.
> @@ -1912,9 +1904,6 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
> BUG_ON(!tsk->ptrace &&
> (tsk->group_leader != tsk || !thread_group_empty(tsk)));
>
> - /* Wake up all pidfd waiters */
> - do_notify_pidfd(tsk);
> -
> if (sig != SIGCHLD) {
> /*
> * This is only possible if parent == real_parent.
>