Re: [PATCH v2 1/2] KEYS: use synchronous task work for changing parent credentials

From: Paul Moore
Date: Tue Sep 10 2024 - 17:08:08 EST


On Aug 5, 2024 Jann Horn <jannh@xxxxxxxxxx> wrote:
>
> keyctl_session_to_parent() involves posting task work to the parent task,
> with work function key_change_session_keyring.
> Because the task work in the parent runs asynchronously, no errors can be
> returned back to the caller of keyctl_session_to_parent(), and therefore
> the work function key_change_session_keyring() can't be allowed to fail due
> to things like memory allocation failure or permission checks - all
> allocations and checks have to happen in the child.
>
> This is annoying for two reasons:
>
> - It is the only reason why cred_alloc_blank() and
> security_transfer_creds() are necessary.
> - It means we can't do synchronous permission checks.
>
> Rewrite keyctl_session_to_parent() to run task work on the parent
> synchronously, so that any errors that happen in the task work can be
> plumbed back into the syscall return value in the child.
> This allows us to get rid of cred_alloc_blank() and
> security_transfer_creds() in a later commit, and it will make it possible
> to write more reliable security checks for this operation.
>
> Note that this requires using TWA_SIGNAL instead of TWA_RESUME, so the
> parent might observe some spurious -EAGAIN syscall returns or such; but the
> parent likely anyway has to be ready to deal with the side effects of
> receiving signals (since it'll probably get SIGCHLD when the child dies),
> so that probably isn't an issue.
>
> Signed-off-by: Jann Horn <jannh@xxxxxxxxxx>
> ---
> security/keys/internal.h | 8 ++++
> security/keys/keyctl.c | 107 +++++++++++++------------------------------
> security/keys/process_keys.c | 86 ++++++++++++++++++----------------
> 3 files changed, 87 insertions(+), 114 deletions(-)

...

> diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
> index ab927a142f51..e4cfe5c4594a 100644
> --- a/security/keys/keyctl.c
> +++ b/security/keys/keyctl.c
> @@ -1616,104 +1616,63 @@ long keyctl_get_security(key_serial_t keyid,
> * parent process.
> *
> * The keyring must exist and must grant the caller LINK permission, and the
> * parent process must be single-threaded and must have the same effective
> * ownership as this process and mustn't be SUID/SGID.
> *
> - * The keyring will be emplaced on the parent when it next resumes userspace.
> + * The keyring will be emplaced on the parent via a pseudo-signal.
> *
> * If successful, 0 will be returned.
> */
> long keyctl_session_to_parent(void)
> {
> - struct task_struct *me, *parent;
> - const struct cred *mycred, *pcred;
> - struct callback_head *newwork, *oldwork;
> + struct keyctl_session_to_parent_context ctx;
> + struct task_struct *parent;
> key_ref_t keyring_r;
> - struct cred *cred;
> int ret;
>
> keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK);
> if (IS_ERR(keyring_r))
> return PTR_ERR(keyring_r);
>
> - ret = -ENOMEM;
> -
> - /* our parent is going to need a new cred struct, a new tgcred struct
> - * and new security data, so we allocate them here to prevent ENOMEM in
> - * our parent */
> - cred = cred_alloc_blank();
> - if (!cred)
> - goto error_keyring;
> - newwork = &cred->rcu;
> + write_lock_irq(&tasklist_lock);
> + parent = get_task_struct(rcu_dereference_protected(current->real_parent,
> + lockdep_is_held(&tasklist_lock)));
> + write_unlock_irq(&tasklist_lock);
>
> - cred->session_keyring = key_ref_to_ptr(keyring_r);
> - keyring_r = NULL;
> - init_task_work(newwork, key_change_session_keyring);
> + /* the parent mustn't be init and mustn't be a kernel thread */
> + if (is_global_init(parent) || (READ_ONCE(parent->flags) & PF_KTHREAD) != 0)
> + goto put_task;

I think we need to explicitly set @ret if we are failing here, yes?

> - me = current;
> - rcu_read_lock();
> - write_lock_irq(&tasklist_lock);
> + ctx.new_session_keyring = key_ref_to_ptr(keyring_r);
> + ctx.child_cred = current_cred();
> + init_completion(&ctx.done);
> + init_task_work(&ctx.work, key_change_session_keyring);
> + ret = task_work_add(parent, &ctx.work, TWA_SIGNAL);
> + if (ret)
> + goto put_task;
>
> - ret = -EPERM;
> - oldwork = NULL;
> - parent = rcu_dereference_protected(me->real_parent,
> - lockdep_is_held(&tasklist_lock));
> + ret = wait_for_completion_interruptible(&ctx.done);
>
> - /* the parent mustn't be init and mustn't be a kernel thread */
> - if (parent->pid <= 1 || !parent->mm)
> - goto unlock;
> -
> - /* the parent must be single threaded */
> - if (!thread_group_empty(parent))
> - goto unlock;
> -
> - /* the parent and the child must have different session keyrings or
> - * there's no point */
> - mycred = current_cred();
> - pcred = __task_cred(parent);
> - if (mycred == pcred ||
> - mycred->session_keyring == pcred->session_keyring) {
> - ret = 0;
> - goto unlock;
> + if (task_work_cancel(parent, &ctx.work)) {
> + /*
> + * We got interrupted and the task work was canceled before it
> + * could execute.
> + * Use -ERESTARTNOINTR instead of -ERESTARTSYS for
> + * compatibility - the manpage does not list -EINTR as a
> + * possible error for keyctl().
> + */
> + ret = -ERESTARTNOINTR;
> + } else {
> + /* task work is running or has been executed */
> + wait_for_completion(&ctx.done);
> + ret = ctx.result;
> }
>
> - /* the parent must have the same effective ownership and mustn't be
> - * SUID/SGID */
> - if (!uid_eq(pcred->uid, mycred->euid) ||
> - !uid_eq(pcred->euid, mycred->euid) ||
> - !uid_eq(pcred->suid, mycred->euid) ||
> - !gid_eq(pcred->gid, mycred->egid) ||
> - !gid_eq(pcred->egid, mycred->egid) ||
> - !gid_eq(pcred->sgid, mycred->egid))
> - goto unlock;
> -
> - /* the keyrings must have the same UID */
> - if ((pcred->session_keyring &&
> - !uid_eq(pcred->session_keyring->uid, mycred->euid)) ||
> - !uid_eq(mycred->session_keyring->uid, mycred->euid))
> - goto unlock;
> -
> - /* cancel an already pending keyring replacement */
> - oldwork = task_work_cancel_func(parent, key_change_session_keyring);
> -
> - /* the replacement session keyring is applied just prior to userspace
> - * restarting */
> - ret = task_work_add(parent, newwork, TWA_RESUME);
> - if (!ret)
> - newwork = NULL;
> -unlock:
> - write_unlock_irq(&tasklist_lock);
> - rcu_read_unlock();
> - if (oldwork)
> - put_cred(container_of(oldwork, struct cred, rcu));
> - if (newwork)
> - put_cred(cred);
> - return ret;
> -
> -error_keyring:
> +put_task:
> + put_task_struct(parent);
> key_ref_put(keyring_r);
> return ret;
> }

--
paul-moore.com