Re: [PATCH 5/6] seccomp: add a way to attach a filter via eBPF fd

From: Kees Cook
Date: Fri Sep 04 2015 - 16:41:49 EST


On Fri, Sep 4, 2015 at 9:04 AM, Tycho Andersen
<tycho.andersen@xxxxxxxxxxxxx> wrote:
> This is the final bit needed to support seccomp filters created via the bpf
> syscall.
>
> One concern with this patch is exactly what the interface should look like
> for users, since seccomp()'s second argument is a pointer, we could ask
> people to pass a pointer to the fd, but implies we might write to it which
> seems impolite. Right now we cast the pointer (and force the user to cast
> it), which generates ugly warnings. I'm not sure what the right answer is
> here.
>
> Signed-off-by: Tycho Andersen <tycho.andersen@xxxxxxxxxxxxx>
> CC: Kees Cook <keescook@xxxxxxxxxxxx>
> CC: Will Drewry <wad@xxxxxxxxxxxx>
> CC: Oleg Nesterov <oleg@xxxxxxxxxx>
> CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
> CC: Pavel Emelyanov <xemul@xxxxxxxxxxxxx>
> CC: Serge E. Hallyn <serge.hallyn@xxxxxxxxxx>
> CC: Alexei Starovoitov <ast@xxxxxxxxxx>
> CC: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
> ---
> include/linux/seccomp.h | 3 +-
> include/uapi/linux/seccomp.h | 1 +
> kernel/seccomp.c | 70 ++++++++++++++++++++++++++++++++++++--------
> 3 files changed, 61 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index d1a86ed..a725dd5 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -3,7 +3,8 @@
>
> #include <uapi/linux/seccomp.h>
>
> -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
> +#define SECCOMP_FILTER_FLAG_MASK (\
> + SECCOMP_FILTER_FLAG_TSYNC | SECCOMP_FILTER_FLAG_EBPF)
>
> #ifdef CONFIG_SECCOMP
>
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index 0f238a4..c29a423 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -16,6 +16,7 @@
>
> /* Valid flags for SECCOMP_SET_MODE_FILTER */
> #define SECCOMP_FILTER_FLAG_TSYNC 1
> +#define SECCOMP_FILTER_FLAG_EBPF (1 << 1)
>
> /*
> * All BPF programs must return a 32-bit value.
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index a2c5b32..9c6bea6 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -355,17 +355,6 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
>
> BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
>
> - /*
> - * Installing a seccomp filter requires that the task has
> - * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
> - * This avoids scenarios where unprivileged tasks can affect the
> - * behavior of privileged children.
> - */
> - if (!task_no_new_privs(current) &&
> - security_capable_noaudit(current_cred(), current_user_ns(),
> - CAP_SYS_ADMIN) != 0)
> - return ERR_PTR(-EACCES);
> -
> /* Allocate a new seccomp_filter */
> sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
> if (!sfilter)
> @@ -509,6 +498,48 @@ static void seccomp_send_sigsys(int syscall, int reason)
> info.si_syscall = syscall;
> force_sig_info(SIGSYS, &info, current);
> }
> +
> +#ifdef CONFIG_BPF_SYSCALL
> +static struct seccomp_filter *seccomp_prepare_ebpf(const char __user *filter)
> +{
> + /* XXX: this cast generates a warning. should we make people pass in
> + * &fd, or is there some nicer way of doing this?
> + */
> + u32 fd = (u32) filter;

I think this is probably the right way to do it, modulo getting the
warning fixed. Let me invoke the great linux-api subscribers to get
some more opinions.

tl;dr: adding SECCOMP_FILTER_FLAG_EBPF to the flags changes the
pointer argument into an fd argument. Is this sane, should it be a
pointer to an fd, or should it not be a flag at all, creating a new
seccomp command instead (SECCOMP_MODE_FILTER_EBPF)?

-Kees

> + struct seccomp_filter *ret;
> + struct bpf_prog *prog;
> +
> + prog = bpf_prog_get(fd);
> + if (IS_ERR(prog))
> + return (struct seccomp_filter *) prog;
> +
> + if (prog->type != BPF_PROG_TYPE_SECCOMP) {
> + bpf_prog_put(prog);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + ret = kzalloc(sizeof(*ret), GFP_KERNEL | __GFP_NOWARN);
> + if (!ret) {
> + bpf_prog_put(prog);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + ret->prog = prog;
> + atomic_set(&ret->usage, 1);
> +
> + /* Intentionally don't bpf_prog_put() here, because the underlying prog
> + * is refcounted too and we're holding a reference from the struct
> + * seccomp_filter object.
> + */
> +
> + return ret;
> +}
> +#else
> +static struct seccomp_filter *seccomp_prepare_ebpf(const char __user *filter)
> +{
> + return ERR_PTR(-EINVAL);
> +}
> +#endif
> #endif /* CONFIG_SECCOMP_FILTER */
>
> /*
> @@ -775,8 +806,23 @@ static long seccomp_set_mode_filter(unsigned int flags,
> if (flags & ~SECCOMP_FILTER_FLAG_MASK)
> return -EINVAL;
>
> + /*
> + * Installing a seccomp filter requires that the task has
> + * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
> + * This avoids scenarios where unprivileged tasks can affect the
> + * behavior of privileged children.
> + */
> + if (!task_no_new_privs(current) &&
> + security_capable_noaudit(current_cred(), current_user_ns(),
> + CAP_SYS_ADMIN) != 0)
> + return -EACCES;
> +
> /* Prepare the new filter before holding any locks. */
> - prepared = seccomp_prepare_user_filter(filter);
> + if (flags & SECCOMP_FILTER_FLAG_EBPF)
> + prepared = seccomp_prepare_ebpf(filter);
> + else
> + prepared = seccomp_prepare_user_filter(filter);
> +
> if (IS_ERR(prepared))
> return PTR_ERR(prepared);
>
> --
> 2.1.4
>



--
Kees Cook
Chrome OS Security
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/