[PATCH v3] Implement /proc/pid/kill

From: Daniel Colascione
Date: Wed Oct 31 2018 - 11:59:25 EST


Add a simple proc-based kill interface. To use /proc/pid/kill, just
write the signal number in base-10 ASCII to the kill file of the
process to be killed: for example, 'echo 9 > /proc/$$/kill'.

Semantically, /proc/pid/kill works like kill(2), except that the
process ID comes from the proc filesystem context instead of from an
explicit system call parameter. This way, it's possible to avoid races
between inspecting some aspect of a process and that process's PID
being reused for some other process.

Note that the write(2) to the kill file descriptor works only if it
happens in the security context as the call to open(2), where
"security context" is defined as the set of all ambient user IDs
(effective uid, fs uid, real uid, and saved uid) as well as the
presence of the CAP_KILL capability. This check prevents confused
deputy attacks via, e.g., supplying a /proc/$(pidof httpd)/kill file
descriptor as the standard output of setuid program and convincing
that program to write a "9".

With /proc/pid/kill, it's possible to write a proper race-free and
safe pkill(1). An approximation follows. A real program might use
openat(2), having opened a process's /proc/pid directory explicitly,
with the directory file descriptor serving as a sort of "process
handle".

#!/bin/bash
set -euo pipefail
pat=$1
for proc_status in /proc/*/status; do (
cd $(dirname $proc_status)
readarray proc_argv -d'' < cmdline
if ((${#proc_argv[@]} > 0)) &&
[[ ${proc_argv[0]} = *$pat* ]];
then
echo 15 > kill
fi
) || true; done

Signed-off-by: Daniel Colascione <dancol@xxxxxxxxxx>
---

Turns out that checking struct user isn't sufficient, since signal.c's
permissions check also cares about effective UIDs. Let's be
extra-paranoid and bail if _anything_ relevant in struct cred
has changed.

Also, as Joel suggested, switch from goto-return to direct return.

fs/proc/base.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7e9f07bf260d..b0e7ded96af9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -205,6 +205,72 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}

+static ssize_t proc_pid_kill_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t res;
+ int sig;
+ char buffer[4];
+ const struct cred *cur_cred;
+ const struct cred *open_cred;
+ bool security_changed;
+
+ /* This check prevents a confused deputy attack in which an
+ * unprivileged process opens /proc/victim/kill and convinces
+ * a privileged process to write to that kill FD, effectively
+ * performing a kill with the privileges of the unwitting
+ * privileged process. Here, we just fail the kill operation
+ * if someone calls write(2) with a real user ID that differs
+ * from the one used to open the kill FD.
+ */
+ cur_cred = current_cred();
+ open_cred = file->f_cred;
+ security_changed =
+ cur_cred->user_ns != open_cred->user_ns ||
+ !uid_eq(cur_cred->euid, open_cred->euid) ||
+ !uid_eq(cur_cred->fsuid, open_cred->fsuid) ||
+ !uid_eq(cur_cred->suid, open_cred->suid) ||
+ !uid_eq(cur_cred->uid, open_cred->uid) ||
+ /* No audit: if we actually use the capability, we'll
+ * audit during the actual kill. Here, we're just
+ * checking whether our kill-FD has escaped its
+ * original security context and bailing if it has.
+ */
+ (security_capable_noaudit(cur_cred,
+ cur_cred->user_ns,
+ CAP_KILL)
+ != security_capable_noaudit(open_cred,
+ open_cred->user_ns,
+ CAP_KILL));
+ if (security_changed)
+ return -EPERM;
+
+ if (*ppos != 0)
+ return -EINVAL;
+
+ if (count > sizeof(buffer) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(buffer, buf, count))
+ return -EINVAL;
+
+ buffer[count] = '\0';
+ res = kstrtoint(strstrip(buffer), 10, &sig);
+ if (res)
+ return res;
+
+ res = kill_pid(proc_pid(file_inode(file)), sig, 0);
+ if (res)
+ return res;
+
+ return count;
+}
+
+static const struct file_operations proc_pid_kill_ops = {
+ .write = proc_pid_kill_write,
+};
+
static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -2935,6 +3001,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
+ REG("kill", S_IRUGO | S_IWUGO, proc_pid_kill_ops),
REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tgid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
--
2.19.1.568.g152ad8e336-goog