[RFC] Get siginfo from unreaped task

From: Kees Cook
Date: Fri Feb 11 2022 - 23:28:54 EST


Make siginfo available through PTRACE_GETSIGINFO after process death,
without needing to have already used PTRACE_ATTACH. Uses 48 more bytes
in task_struct, though I bet there might be somewhere else we could
stash a copy of it?

Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
---
include/linux/sched.h | 1 +
kernel/ptrace.c | 12 +-
kernel/signal.c | 4 +
tools/testing/selftests/seccomp/seccomp_bpf.c | 119 ++++++++++++++++++
4 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f5b2be39a78c..e40789e801ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1178,6 +1178,7 @@ struct task_struct {
#endif
/* Ptrace state: */
unsigned long ptrace_message;
+ kernel_siginfo_t death_siginfo;
kernel_siginfo_t *last_siginfo;

struct task_io_accounting ioac;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index eea265082e97..990839c57842 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1304,8 +1304,16 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,

ret = ptrace_check_attach(child, request == PTRACE_KILL ||
request == PTRACE_INTERRUPT);
- if (ret < 0)
- goto out_put_task_struct;
+ if (ret < 0) {
+ /*
+ * Allow PTRACE_GETSIGINFO if process is dead
+ * and we could otherwise ptrace it.
+ */
+ if (request != PTRACE_GETSIGINFO ||
+ !child->exit_state ||
+ !ptrace_may_access(child, PTRACE_MODE_READ_REALCREDS))
+ goto out_put_task_struct;
+ }

ret = arch_ptrace(child, request, addr, data);
if (ret || request != PTRACE_DETACH)
diff --git a/kernel/signal.c b/kernel/signal.c
index 9b04631acde8..41f6ba6b7aa7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2825,6 +2825,10 @@ bool get_signal(struct ksignal *ksig)
}

fatal:
+ /* Allow siginfo to be queried until reaped. */
+ copy_siginfo(&current->death_siginfo, &ksig->info);
+ current->last_siginfo = &current->death_siginfo;
+
spin_unlock_irq(&sighand->siglock);
if (unlikely(cgroup_task_frozen(current)))
cgroup_leave_frozen(true);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..d2bbf9e32f22 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -268,6 +268,10 @@ struct seccomp_notif_addfd_big {
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
#endif

+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -765,6 +769,121 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
close(fd);
}

+FIXTURE(SIGINFO) {
+ pid_t child_pid;
+};
+
+FIXTURE_SETUP(SIGINFO)
+{
+ self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(SIGINFO)
+{
+ if (self->child_pid > 0)
+ waitpid(self->child_pid, NULL, WNOHANG);
+}
+
+TEST_F(SIGINFO, child)
+{
+ int status;
+ siginfo_t info = { };
+ /* Kill only when calling __NR_prctl. */
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS | 0xBA),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ self->child_pid = fork();
+ ASSERT_LE(0, self->child_pid);
+ if (self->child_pid == 0) {
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+ prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ /* Should have died now. */
+ _exit(37);
+ }
+
+ /* Check siginfo_t contents. */
+ EXPECT_EQ(waitid(P_PID, self->child_pid, &info, WEXITED | WNOWAIT), 0);
+#if 0
+ struct {
+ int si_signo;
+ int si_code;
+ int si_errno;
+ union __sifields _sifields;
+ }
+
+ /* SIGCHLD */
+ struct {
+ __kernel_pid_t _pid; /* which child */
+ __kernel_uid32_t _uid; /* sender's uid */
+ int _status; /* exit code */
+ __ARCH_SI_CLOCK_T _utime;
+ __ARCH_SI_CLOCK_T _stime;
+ } _sigchld;
+#endif
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ EXPECT_TRUE(info.si_code == CLD_KILLED || info.si_code == CLD_DUMPED);
+ EXPECT_TRUE(info.si_errno == 0);
+ EXPECT_EQ(info.si_pid, self->child_pid);
+
+ ASSERT_TRUE(WIFSIGNALED(info.si_status));
+ /* TODO: why doesn't this WCOREDUMP() agree with below? */
+ /* EXPECT_TRUE(WCOREDUMP(status)); */
+ EXPECT_EQ(WTERMSIG(info.si_status), SIGSYS);
+
+ memset(&info, 0, sizeof(info));
+ ASSERT_EQ(ptrace(PTRACE_GETSIGINFO, self->child_pid, NULL, &info), 0);
+#if 0
+ /* SIGSYS */
+ struct {
+ void __user *_call_addr;/* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } _sigsys;
+
+ info.si_signo = SIGSYS;
+ info.si_code = SYS_SECCOMP;
+ info.si_call_addr = (void __user *)KSTK_EIP(current);
+ info.si_errno = reason;
+ info.si_arch = syscall_get_arch(current);
+ info.si_syscall = syscall;
+
+#endif
+ ASSERT_EQ(info.si_signo, SIGSYS);
+ EXPECT_EQ(info.si_code, SYS_SECCOMP);
+ /*
+ * The syscall will have happened somewhere near the libc
+ * prctl implementation.
+ */
+ EXPECT_TRUE(info.si_call_addr >= (void *)prctl &&
+ info.si_call_addr <= (void *)prctl + PAGE_SIZE) {
+ TH_LOG("info.si_call_addr: %p", info.si_call_addr);
+ TH_LOG("prctl : %p", prctl);
+ }
+ EXPECT_EQ(info.si_errno, 0xBA);
+ /* EXPECT_EQ(info.si_arch, ...native arch...); */
+ EXPECT_EQ(info.si_syscall, __NR_prctl);
+
+ /* Check status contents. */
+ ASSERT_EQ(waitpid(self->child_pid, &status, 0), self->child_pid);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ /* TODO: why doesn't this WCOREDUMP() agree with above? */
+ /* EXPECT_TRUE(WCOREDUMP(status)); */
+ EXPECT_EQ(WTERMSIG(status), SIGSYS);
+ self->child_pid = 0;
+}
+
/* This is a thread task to die via seccomp filter violation. */
void *kill_thread(void *data)
{
--
2.30.2