Re: [PATCH 1/2] perf daemon: Force waipid for all session on SIGCHLD delivery

From: Arnaldo Carvalho de Melo
Date: Wed Mar 24 2021 - 09:25:39 EST


Em Sat, Mar 20, 2021 at 11:10:12PM +0100, Jiri Olsa escreveu:
> If we don't process SIGCHLD before another comes, we will
> see just one SIGCHLD as a result. In this case current code
> will miss exit notification for a session and wait forever.
>
> Adding extra waitpid check for all sessions when SIGCHLD
> is received, to make sure we don't miss any session exit.
>
> Also fix close condition for signal_fd.

Thanks, applied.

- Arnaldo


> Reported-by: Ian Rogers <irogers@xxxxxxxxxx>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
> tools/perf/builtin-daemon.c | 50 +++++++++++++++++++++----------------
> 1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
> index ace8772a4f03..4697493842f5 100644
> --- a/tools/perf/builtin-daemon.c
> +++ b/tools/perf/builtin-daemon.c
> @@ -402,35 +402,42 @@ static pid_t handle_signalfd(struct daemon *daemon)
> int status;
> pid_t pid;
>
> + /*
> + * Take signal fd data as pure signal notification and check all
> + * the sessions state. The reason is that multiple signals can get
> + * coalesced in kernel and we can receive only single signal even
> + * if multiple SIGCHLD were generated.
> + */
> err = read(daemon->signal_fd, &si, sizeof(struct signalfd_siginfo));
> - if (err != sizeof(struct signalfd_siginfo))
> + if (err != sizeof(struct signalfd_siginfo)) {
> + pr_err("failed to read signal fd\n");
> return -1;
> + }
>
> list_for_each_entry(session, &daemon->sessions, list) {
> + if (session->pid == -1)
> + continue;
>
> - if (session->pid != (int) si.ssi_pid)
> + pid = waitpid(session->pid, &status, WNOHANG);
> + if (pid <= 0)
> continue;
>
> - pid = waitpid(session->pid, &status, 0);
> - if (pid == session->pid) {
> - if (WIFEXITED(status)) {
> - pr_info("session '%s' exited, status=%d\n",
> - session->name, WEXITSTATUS(status));
> - } else if (WIFSIGNALED(status)) {
> - pr_info("session '%s' killed (signal %d)\n",
> - session->name, WTERMSIG(status));
> - } else if (WIFSTOPPED(status)) {
> - pr_info("session '%s' stopped (signal %d)\n",
> - session->name, WSTOPSIG(status));
> - } else {
> - pr_info("session '%s' Unexpected status (0x%x)\n",
> - session->name, status);
> - }
> + if (WIFEXITED(status)) {
> + pr_info("session '%s' exited, status=%d\n",
> + session->name, WEXITSTATUS(status));
> + } else if (WIFSIGNALED(status)) {
> + pr_info("session '%s' killed (signal %d)\n",
> + session->name, WTERMSIG(status));
> + } else if (WIFSTOPPED(status)) {
> + pr_info("session '%s' stopped (signal %d)\n",
> + session->name, WSTOPSIG(status));
> + } else {
> + pr_info("session '%s' Unexpected status (0x%x)\n",
> + session->name, status);
> }
>
> session->state = KILL;
> session->pid = -1;
> - return pid;
> }
>
> return 0;
> @@ -443,7 +450,6 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
> .fd = daemon->signal_fd,
> .events = POLLIN,
> };
> - pid_t wpid = 0, pid = session->pid;
> time_t start;
>
> start = time(NULL);
> @@ -452,7 +458,7 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
> int err = poll(&pollfd, 1, 1000);
>
> if (err > 0) {
> - wpid = handle_signalfd(daemon);
> + handle_signalfd(daemon);
> } else if (err < 0) {
> perror("failed: poll\n");
> return -1;
> @@ -460,7 +466,7 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
>
> if (start + secs < time(NULL))
> return -1;
> - } while (wpid != pid);
> + } while (session->pid != -1);
>
> return 0;
> }
> @@ -1344,7 +1350,7 @@ static int __cmd_start(struct daemon *daemon, struct option parent_options[],
> close(sock_fd);
> if (conf_fd != -1)
> close(conf_fd);
> - if (conf_fd != -1)
> + if (signal_fd != -1)
> close(signal_fd);
>
> pr_info("daemon exited\n");
> --
> 2.30.2
>

--

- Arnaldo