[PATCH 3.16 228/233] ptrace: use fsuid, fsgid, effective creds for fs access checks

From: Ben Hutchings
Date: Sat Sep 09 2017 - 18:29:23 EST


3.16.48-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Jann Horn <jann@xxxxxxxxx>

commit caaee6234d05a58c5b4d05e7bf766131b810a657 upstream.

By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.

To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.

The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.

While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.

In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:

/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret

Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).

[akpm@xxxxxxxxxxxxxxxxxxxx: fix warning]
Signed-off-by: Jann Horn <jann@xxxxxxxxx>
Acked-by: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Casey Schaufler <casey@xxxxxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: James Morris <james.l.morris@xxxxxxxxxx>
Cc: "Serge E. Hallyn" <serge.hallyn@xxxxxxxxxx>
Cc: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
Cc: Willy Tarreau <w@xxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
[bwh: Backported to 3.16:
- Update mm_access() calls in fs/proc/task_{,no}mmu.c too
- Adjust context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -391,7 +391,7 @@ static int do_task_stat(struct seq_file

state = *get_task_state(task);
vsize = eip = esp = 0;
- permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
+ permitted = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT);
mm = get_task_mm(task);
if (mm) {
vsize = task_vsize(mm);
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -207,7 +207,7 @@ static int proc_pid_cmdline(struct task_

static int proc_pid_auxv(struct task_struct *task, char *buffer)
{
- struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
+ struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
int res = PTR_ERR(mm);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
@@ -236,7 +236,8 @@ static int proc_pid_wchan(struct task_st

wchan = get_wchan(task);

- if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname))
+ if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
+ && !lookup_symbol_name(wchan, symname))
return sprintf(buffer, "%s", symname);
else
return sprintf(buffer, "0");
@@ -248,7 +249,7 @@ static int lock_trace(struct task_struct
int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
if (err)
return err;
- if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
+ if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
mutex_unlock(&task->signal->cred_guard_mutex);
return -EPERM;
}
@@ -522,7 +523,7 @@ static int proc_fd_access_allowed(struct
*/
task = get_proc_task(inode);
if (task) {
- allowed = ptrace_may_access(task, PTRACE_MODE_READ);
+ allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
put_task_struct(task);
}
return allowed;
@@ -557,7 +558,7 @@ static bool has_pid_permissions(struct p
return true;
if (in_group_p(pid->pid_gid))
return true;
- return ptrace_may_access(task, PTRACE_MODE_READ);
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
}


@@ -672,7 +673,7 @@ static int __mem_open(struct inode *inod
if (!task)
return -ESRCH;

- mm = mm_access(task, mode);
+ mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
put_task_struct(task);

if (IS_ERR(mm))
@@ -1729,7 +1730,7 @@ static int map_files_d_revalidate(struct
if (!task)
goto out_notask;

- mm = mm_access(task, PTRACE_MODE_READ);
+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (IS_ERR_OR_NULL(mm))
goto out;

@@ -1864,7 +1865,7 @@ static struct dentry *proc_map_files_loo
goto out;

result = -EACCES;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;

result = -ENOENT;
@@ -1921,7 +1922,7 @@ proc_map_files_readdir(struct file *file
goto out;

ret = -EACCES;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;

ret = 0;
@@ -2400,7 +2401,7 @@ static int do_io_accounting(struct task_
if (result)
return result;

- if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
result = -EACCES;
goto out_unlock;
}
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -119,7 +119,7 @@ static void *proc_ns_follow_link(struct
if (!task)
goto out;

- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;

ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
@@ -152,7 +152,7 @@ static int proc_ns_readlink(struct dentr
if (!task)
goto out;

- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;

res = -ENOENT;
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -56,7 +56,29 @@ extern void exit_ptrace(struct task_stru
#define PTRACE_MODE_READ 0x01
#define PTRACE_MODE_ATTACH 0x02
#define PTRACE_MODE_NOAUDIT 0x04
-/* Returns true on success, false on denial. */
+#define PTRACE_MODE_FSCREDS 0x08
+#define PTRACE_MODE_REALCREDS 0x10
+
+/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
+#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
+#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
+#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
+#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
+
+/**
+ * ptrace_may_access - check whether the caller is permitted to access
+ * a target task.
+ * @task: target task
+ * @mode: selects type of access and caller credentials
+ *
+ * Returns true on success, false on denial.
+ *
+ * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must
+ * be set in @mode to specify whether the access was requested through
+ * a filesystem syscall (should use effective capabilities and fsuid
+ * of the caller) or through an explicit syscall such as
+ * process_vm_writev or ptrace (and should use the real credentials).
+ */
extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);

static inline int ptrace_reparented(struct task_struct *child)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3247,7 +3247,7 @@ find_lively_task_by_vpid(pid_t vpid)

/* Reuse ptrace permission checks for now. */
err = -EACCES;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
goto errout;

return task;
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2797,7 +2797,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pi
}

ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
goto err_unlock;

head = p->robust_list;
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -155,7 +155,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list,
}

ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
goto err_unlock;

head = p->compat_robust_list;
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -122,8 +122,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t
&task2->signal->cred_guard_mutex);
if (ret)
goto err;
- if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
- !ptrace_may_access(task2, PTRACE_MODE_READ)) {
+ if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
+ !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) {
ret = -EPERM;
goto err_unlock;
}
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -231,6 +231,14 @@ static int ptrace_has_cap(struct user_na
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
+ int dumpable = 0;
+ kuid_t caller_uid;
+ kgid_t caller_gid;
+
+ if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) {
+ WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n");
+ return -EPERM;
+ }

/* May we inspect the given task?
* This check is used both for attaching with ptrace
@@ -240,18 +248,33 @@ static int __ptrace_may_access(struct ta
* because setting up the necessary parent/child relationship
* or halting the specified task is impossible.
*/
- int dumpable = 0;
+
/* Don't let security modules deny introspection */
if (same_thread_group(task, current))
return 0;
rcu_read_lock();
+ if (mode & PTRACE_MODE_FSCREDS) {
+ caller_uid = cred->fsuid;
+ caller_gid = cred->fsgid;
+ } else {
+ /*
+ * Using the euid would make more sense here, but something
+ * in userland might rely on the old behavior, and this
+ * shouldn't be a security problem since
+ * PTRACE_MODE_REALCREDS implies that the caller explicitly
+ * used a syscall that requests access to another process
+ * (and not a filesystem syscall to procfs).
+ */
+ caller_uid = cred->uid;
+ caller_gid = cred->gid;
+ }
tcred = __task_cred(task);
- if (uid_eq(cred->uid, tcred->euid) &&
- uid_eq(cred->uid, tcred->suid) &&
- uid_eq(cred->uid, tcred->uid) &&
- gid_eq(cred->gid, tcred->egid) &&
- gid_eq(cred->gid, tcred->sgid) &&
- gid_eq(cred->gid, tcred->gid))
+ if (uid_eq(caller_uid, tcred->euid) &&
+ uid_eq(caller_uid, tcred->suid) &&
+ uid_eq(caller_uid, tcred->uid) &&
+ gid_eq(caller_gid, tcred->egid) &&
+ gid_eq(caller_gid, tcred->sgid) &&
+ gid_eq(caller_gid, tcred->gid))
goto ok;
if (ptrace_has_cap(tcred->user_ns, mode))
goto ok;
@@ -318,7 +341,7 @@ static int ptrace_attach(struct task_str
goto out;

task_lock(task);
- retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
+ retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
task_unlock(task);
if (retval)
goto unlock_creds;
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -197,7 +197,7 @@ static ssize_t process_vm_rw_core(pid_t
goto free_proc_pages;
}

- mm = mm_access(task, PTRACE_MODE_ATTACH);
+ mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS);
if (!mm || IS_ERR(mm)) {
rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
/*
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -142,12 +142,17 @@ int cap_ptrace_access_check(struct task_
{
int ret = 0;
const struct cred *cred, *child_cred;
+ const kernel_cap_t *caller_caps;

rcu_read_lock();
cred = current_cred();
child_cred = __task_cred(child);
+ if (mode & PTRACE_MODE_FSCREDS)
+ caller_caps = &cred->cap_effective;
+ else
+ caller_caps = &cred->cap_permitted;
if (cred->user_ns == child_cred->user_ns &&
- cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
+ cap_issubset(child_cred->cap_permitted, *caller_caps))
goto out;
if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
goto out;
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -165,7 +165,7 @@ static void *m_start(struct seq_file *m,
if (!priv->task)
return ERR_PTR(-ESRCH);

- mm = mm_access(priv->task, PTRACE_MODE_READ);
+ mm = mm_access(priv->task, PTRACE_MODE_READ_FSCREDS);
if (!mm || IS_ERR(mm))
return mm;
down_read(&mm->mmap_sem);
@@ -1188,7 +1188,7 @@ static ssize_t pagemap_read(struct file
if (!pm.buffer)
goto out_task;

- mm = mm_access(task, PTRACE_MODE_READ);
+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
ret = PTR_ERR(mm);
if (!mm || IS_ERR(mm))
goto out_free;
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -216,7 +216,7 @@ static void *m_start(struct seq_file *m,
if (!priv->task)
return ERR_PTR(-ESRCH);

- mm = mm_access(priv->task, PTRACE_MODE_READ);
+ mm = mm_access(priv->task, PTRACE_MODE_READ_FSCREDS);
if (!mm || IS_ERR(mm)) {
put_task_struct(priv->task);
priv->task = NULL;