Re: [PATCH 3/4] seccomp: Add SECCOMP_USER_NOTIF_FLAG_PIDFD to get pidfd on listener trap

From: Aleksa Sarai
Date: Sat Jan 25 2020 - 23:03:43 EST


On 2020-01-24, Sargun Dhillon <sargun@xxxxxxxxx> wrote:
> This introduces the capability for users of seccomp's listener behaviour
> to be able to receive the pidfd of the process that triggered the event.
> Currently, this just opens the group leader of the thread that triggere
> the event, as pidfds (currently) are limited to group leaders.
>
> For actions which do not act on the process outside of the pidfd, there
> is then no need to check the cookie to ensure validity of the request
> throughout the listener's handling of it.
>
> This can be extended later on as well when pidfd capabilities are added
> to be able to have the listener imbue the pidfd with certain capabilities
> when it is delivered to userspace.
>
> It is the responsibility of the user to close the pidfd.
>
> Signed-off-by: Sargun Dhillon <sargun@xxxxxxxxx>
> ---
> include/uapi/linux/seccomp.h | 4 +++
> kernel/seccomp.c | 68 ++++++++++++++++++++++++++++++++----
> 2 files changed, 66 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index be84d87f1f46..64f6fc5c95f1 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -69,11 +69,15 @@ struct seccomp_notif_sizes {
> __u16 seccomp_data;
> };
>
> +/* Valid flags for struct seccomp_notif */
> +#define SECCOMP_USER_NOTIF_FLAG_PIDFD (1UL << 0) /* populate pidfd */
> +
> struct seccomp_notif {
> __u64 id;
> __u32 pid;
> __u32 flags;
> struct seccomp_data data;
> + __u32 pidfd;
> };
>
> /*
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index b6ea3dcb57bf..93f9cf45ce07 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -1019,21 +1019,61 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
> return 0;
> }
>
> +
> +static long __seccomp_notify_recv_pidfd(void __user *buf,
> + struct seccomp_notif *unotif,
> + struct task_struct *group_leader)
> +{
> + struct file *pidfd_file;
> + struct pid *pid;
> + int fd;
> +
> + pid = get_task_pid(group_leader, PIDTYPE_PID);
> + pidfd_file = pidfd_create_file(pid);
> + put_pid(pid);
> + if (IS_ERR(pidfd_file))
> + return PTR_ERR(pidfd_file);
> +
> + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);

You don't need to pass O_RDWR -- only O_CLOEXEC is checked by
get_unused_fd_flags().

> + if (fd < 0) {
> + fput(pidfd_file);
> + return fd;
> + }
> +
> + unotif->pidfd = fd;
> +
> + if (copy_to_user(buf, unotif, sizeof(*unotif))) {
> + put_unused_fd(fd);
> + fput(pidfd_file);
> + return -EFAULT;
> + }
> +
> + fd_install(fd, pidfd_file);
> +
> + return 0;
> +}
> +
> static long seccomp_notify_recv(struct seccomp_filter *filter,
> void __user *buf)
> {
> struct seccomp_knotif *knotif = NULL, *cur;
> struct seccomp_notif unotif;
> + struct task_struct *group_leader;
> + bool send_pidfd;
> ssize_t ret;
>
> + if (copy_from_user(&unotif, buf, sizeof(unotif)))
> + return -EFAULT;
> /* Verify that we're not given garbage to keep struct extensible. */
> - ret = check_zeroed_user(buf, sizeof(unotif));
> - if (ret < 0)
> - return ret;
> - if (!ret)
> + if (unotif.id ||
> + unotif.pid ||
> + memchr_inv(&unotif.data, 0, sizeof(unotif.data)) ||
> + unotif.pidfd)
> + return -EINVAL;

IMHO this check is more confusing than the original check_zeroed_user().
Something like the following is simpler and less prone to forgetting to
add a new field in the future:

if (memchr_inv(&unotif, 0, sizeof(unotif)))
return -EINVAL;

> + if (unotif.flags & ~(SECCOMP_USER_NOTIF_FLAG_PIDFD))
> return -EINVAL;
>
> - memset(&unotif, 0, sizeof(unotif));
> + send_pidfd = unotif.flags & SECCOMP_USER_NOTIF_FLAG_PIDFD;
>
> ret = down_interruptible(&filter->notif->request);
> if (ret < 0)
> @@ -1057,9 +1097,13 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
> goto out;
> }
>
> + memset(&unotif, 0, sizeof(unotif));
> +
> unotif.id = knotif->id;
> unotif.pid = task_pid_vnr(knotif->task);
> unotif.data = *(knotif->data);
> + if (send_pidfd)
> + group_leader = get_task_struct(knotif->task->group_leader);
>
> knotif->state = SECCOMP_NOTIFY_SENT;
> wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM);
> @@ -1067,9 +1111,21 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
> out:
> mutex_unlock(&filter->notify_lock);
>
> - if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
> + if (ret)
> + return ret;
> +
> + /*
> + * We've successfully received a notification, let's try to copy it to
> + * userspace.
> + */
> + if (send_pidfd) {
> + ret = __seccomp_notify_recv_pidfd(buf, &unotif, group_leader);
> + put_task_struct(group_leader);
> + } else if (copy_to_user(buf, &unotif, sizeof(unotif))) {
> ret = -EFAULT;
> + }

To my eye, the way this helper is used is a bit ugly -- my first
question when reading this was "why aren't we doing a copy_to_user() for
pidfds?".

Something like the following might be a bit cleaner I think:

struct file *pidfd_file = NULL;

if (send_pidfd) {
// helper allocates the pidfd_file and sets unotify->fd
ret = __seccomp_notify_recv_pidfd(&unotify, &pidfd_file)
if (ret)
goto err; // or whatever
}

if (copy_to_user(buf, &unotif, sizeof(unotif))) {
ret = -EFAULT;
goto err; // or whatever
}

if (send_pidfd)
fd_install(unotif.fd, pidfd_file)

But to be fair, this is also somewhat ugly too.

--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>

Attachment: signature.asc
Description: PGP signature