[PATCH] pidfd: getfd should always report ESRCH if a task is exiting

From: Tycho Andersen
Date: Tue Feb 06 2024 - 11:43:38 EST


From: Tycho Andersen <tandersen@xxxxxxxxxxx>

We can get EBADF from __pidfd_fget() if a task is currently exiting, which
might be confusing. Let's check PF_EXITING, and just report ESRCH if so.

I chose PF_EXITING, because it is set in exit_signals(), which is called
before exit_files(). Since ->exit_status is mostly set after exit_files()
in exit_notify(), using that still leaves a window open for the race.

Signed-off-by: Tycho Andersen <tandersen@xxxxxxxxxxx>
---
kernel/pid.c | 2 +-
.../selftests/pidfd/pidfd_getfd_test.c | 31 ++++++++++++++++++-
2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index de0bf2f8d18b..db8731f0ee45 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -688,7 +688,7 @@ static int pidfd_getfd(struct pid *pid, int fd)
int ret;

task = get_pid_task(pid, PIDTYPE_PID);
- if (!task)
+ if (!task || task->flags & PF_EXITING)
return -ESRCH;

file = __pidfd_fget(task, fd);
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 0930e2411dfb..cd51d547b751 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -5,6 +5,7 @@
#include <fcntl.h>
#include <limits.h>
#include <linux/types.h>
+#include <poll.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
@@ -129,6 +130,7 @@ FIXTURE(child)
* When it is closed, the child will exit.
*/
int sk;
+ bool ignore_child_result;
};

FIXTURE_SETUP(child)
@@ -165,10 +167,14 @@ FIXTURE_SETUP(child)

FIXTURE_TEARDOWN(child)
{
+ int ret;
+
EXPECT_EQ(0, close(self->pidfd));
EXPECT_EQ(0, close(self->sk));

- EXPECT_EQ(0, wait_for_pid(self->pid));
+ ret = wait_for_pid(self->pid);
+ if (!self->ignore_child_result)
+ EXPECT_EQ(0, ret);
}

TEST_F(child, disable_ptrace)
@@ -235,6 +241,29 @@ TEST(flags_set)
EXPECT_EQ(errno, EINVAL);
}

+TEST_F(child, no_strange_EBADF)
+{
+ struct pollfd fds;
+
+ self->ignore_child_result = true;
+
+ fds.fd = self->pidfd;
+ fds.events = POLLIN;
+
+ ASSERT_EQ(kill(self->pid, SIGKILL), 0);
+ ASSERT_EQ(poll(&fds, 1, 5000), 1);
+
+ /*
+ * It used to be that pidfd_getfd() could race with the exiting thread
+ * between exit_files() and release_task(), and get a non-null task
+ * with a NULL files struct, and you'd get EBADF, which was slightly
+ * confusing.
+ */
+ errno = 0;
+ EXPECT_EQ(sys_pidfd_getfd(self->pidfd, self->remote_fd, 0), -1);
+ EXPECT_EQ(errno, ESRCH);
+}
+
#if __NR_pidfd_getfd == -1
int main(void)
{

base-commit: 082d11c164aef02e51bcd9c7cbf1554a8e42d9b5
--
2.34.1