Re: [PATCH 3/4] seccomp: Introduce SECCOMP_ATTACH_FILTER operation

From: Kees Cook
Date: Tue Oct 10 2023 - 20:22:47 EST


On Mon, Oct 09, 2023 at 12:40:45PM +0000, Hengqi Chen wrote:
> The SECCOMP_ATTACH_FILTER operation is used to attach
> a loaded filter to the current process. The loaded filter
> is represented by a fd which is either returned by the
> SECCOMP_LOAD_FILTER operation or obtained from bpffs using
> bpf syscall.
>
> Signed-off-by: Hengqi Chen <hengqi.chen@xxxxxxxxx>
> ---
> include/uapi/linux/seccomp.h | 1 +
> kernel/seccomp.c | 68 +++++++++++++++++++++++++++++++++---
> 2 files changed, 64 insertions(+), 5 deletions(-)
>
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index ee2c83697810..fbe30262fdfc 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -17,6 +17,7 @@
> #define SECCOMP_GET_ACTION_AVAIL 2
> #define SECCOMP_GET_NOTIF_SIZES 3
> #define SECCOMP_LOAD_FILTER 4
> +#define SECCOMP_ATTACH_FILTER 5
>
> /* Valid flags for SECCOMP_SET_MODE_FILTER */
> #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 3ae43db3b642..9f9d8a7a1d6e 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -523,7 +523,10 @@ static inline pid_t seccomp_can_sync_threads(void)
> static inline void seccomp_filter_free(struct seccomp_filter *filter)
> {
> if (filter) {
> - bpf_prog_destroy(filter->prog);
> + if (filter->prog->type == BPF_PROG_TYPE_SECCOMP)
> + bpf_prog_put(filter->prog);
> + else
> + bpf_prog_destroy(filter->prog);
> kfree(filter);
> }
> }
> @@ -894,7 +897,7 @@ static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
> #endif /* SECCOMP_ARCH_NATIVE */
>
> /**
> - * seccomp_attach_filter: validate and attach filter
> + * seccomp_do_attach_filter: validate and attach filter
> * @flags: flags to change filter behavior
> * @filter: seccomp filter to add to the current process
> *
> @@ -905,8 +908,8 @@ static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
> * seccomp mode or did not have an ancestral seccomp filter
> * - in NEW_LISTENER mode: the fd of the new listener
> */
> -static long seccomp_attach_filter(unsigned int flags,
> - struct seccomp_filter *filter)
> +static long seccomp_do_attach_filter(unsigned int flags,
> + struct seccomp_filter *filter)
> {
> unsigned long total_insns;
> struct seccomp_filter *walker;
> @@ -2001,7 +2004,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
> goto out;
> }
>
> - ret = seccomp_attach_filter(flags, prepared);
> + ret = seccomp_do_attach_filter(flags, prepared);
> if (ret)
> goto out;
> /* Do not free the successfully attached filter. */
> @@ -2058,6 +2061,51 @@ static long seccomp_load_filter(const char __user *filter)
> bpf_prog_put(prog);
> return ret;
> }
> +
> +static long seccomp_attach_filter(const char __user *ufd)
> +{
> + const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
> + struct seccomp_filter *sfilter;
> + struct bpf_prog *prog;
> + int flags = 0;
> + int fd, ret;
> +
> + if (copy_from_user(&fd, ufd, sizeof(fd)))
> + return -EFAULT;
> +
> + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SECCOMP);
> + if (IS_ERR(prog))
> + return PTR_ERR(prog);
> +
> + sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
> + if (!sfilter) {
> + bpf_prog_put(prog);
> + return -ENOMEM;
> + }
> +
> + sfilter->prog = prog;
> + refcount_set(&sfilter->refs, 1);
> + refcount_set(&sfilter->users, 1);
> + mutex_init(&sfilter->notify_lock);
> + init_waitqueue_head(&sfilter->wqh);
> +
> + spin_lock_irq(&current->sighand->siglock);
> +
> + ret = -EINVAL;
> + if (!seccomp_may_assign_mode(seccomp_mode))
> + goto out;
> +
> + ret = seccomp_do_attach_filter(flags, sfilter);
> + if (ret)
> + goto out;
> +
> + sfilter = NULL;
> + seccomp_assign_mode(current, seccomp_mode, flags);
> +out:
> + spin_unlock_irq(&current->sighand->siglock);
> + seccomp_filter_free(sfilter);
> + return ret;
> +}

This is duplicating part of seccomp_set_mode_filter() but without
handling flags at all. This isn't really workable, since we need things
like TSYNC, etc. I think it would be better to adjust
SECCOMP_SET_MODE_FILTER to take a new flag that indicates that the user
arg is an fd, not a filter. Then the middle of seccomp_set_mode_filter()
can choosen between seccomp_prepare_user_filter() and a wrapped call to
bpf_prog_get_type() on the fd, etc.

--
Kees Cook