Re: [PATCH v3 3/4] selftests/bpf: Add tests verifying bpf lsm userns_create hook

From: Christian Brauner
Date: Fri Jul 22 2022 - 04:15:25 EST


On Thu, Jul 21, 2022 at 12:28:07PM -0500, Frederick Lawler wrote:
> The LSM hook userns_create was introduced to provide LSM's an
> opportunity to block or allow unprivileged user namespace creation. This
> test serves two purposes: it provides a test eBPF implementation, and
> tests the hook successfully blocks or allows user namespace creation.
>
> This tests 4 cases:
>
> 1. Unattached bpf program does not block unpriv user namespace
> creation.
> 2. Attached bpf program allows user namespace creation given
> CAP_SYS_ADMIN privileges.
> 3. Attached bpf program denies user namespace creation for a
> user without CAP_SYS_ADMIN.
> 4. The sleepable implementation loads

Sounds good!

>
> Signed-off-by: Frederick Lawler <fred@xxxxxxxxxxxxxx>
>
> ---
> The generic deny_namespace file name is used for future namespace
> expansion. I didn't want to limit these files to just the create_user_ns
> hook.
> Changes since v2:
> - Rename create_user_ns hook to userns_create
> Changes since v1:
> - Introduce this patch
> ---
> .../selftests/bpf/prog_tests/deny_namespace.c | 88 +++++++++++++++++++
> .../selftests/bpf/progs/test_deny_namespace.c | 39 ++++++++
> 2 files changed, 127 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> create mode 100644 tools/testing/selftests/bpf/progs/test_deny_namespace.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> new file mode 100644
> index 000000000000..9e4714295008
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define _GNU_SOURCE
> +#include <test_progs.h>
> +#include "test_deny_namespace.skel.h"
> +#include <sched.h>
> +#include "cap_helpers.h"
> +
> +#define STACK_SIZE (1024 * 1024)
> +static char child_stack[STACK_SIZE];
> +
> +int clone_callback(void *arg)
> +{
> + return 0;
> +}
> +
> +static int create_new_user_ns(void)
> +{
> + int status;
> + pid_t cpid;
> +
> + cpid = clone(clone_callback, child_stack + STACK_SIZE,
> + CLONE_NEWUSER | SIGCHLD, NULL);
> +
> + if (cpid == -1)
> + return errno;
> +
> + if (cpid == 0)
> + return 0;

Martin asked about this already but fwiw, this cannot happen with
clone(). The clone() function doesn't return twice. It always returns
the PID of the child process or an error.

> +
> + waitpid(cpid, &status, 0);
> + if (WIFEXITED(status))
> + return WEXITSTATUS(status);
> +
> + return -1;
> +}

You can also just avoid the clone() dance and simply do sm like:

static int wait_for_pid(pid_t pid)
{
int status, ret;

again:
ret = waitpid(pid, &status, 0);
if (ret == -1) {
if (errno == EINTR)
goto again;

return -1;
}

if (!WIFEXITED(status))
return -1;

return WEXITSTATUS(status);
}

/* negative return value -> some internal error
* positive return value -> userns creation failed
* 0 -> userns creation succeeded
*/
static int create_user_ns(void)
{
pid_t pid;

pid = fork();
if (pid < 0)
return -1;

if (pid == 0) {
if (unshare(CLONE_NEWUSER))
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}

return wait_for_pid(pid);
}

Same difference since both codepaths hit the right spot in the kernel.

> +
> +static void test_userns_create_bpf(void)
> +{
> + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
> + __u64 old_caps = 0;
> +
> + ASSERT_OK(create_new_user_ns(), "priv new user ns");
> +
> + cap_disable_effective(cap_mask, &old_caps);
> +
> + ASSERT_EQ(create_new_user_ns(), EPERM, "unpriv new user ns");
> +
> + if (cap_mask & old_caps)
> + cap_enable_effective(cap_mask, NULL);
> +}
> +
> +static void test_unpriv_userns_create_no_bpf(void)
> +{
> + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
> + __u64 old_caps = 0;
> +
> + cap_disable_effective(cap_mask, &old_caps);
> +
> + ASSERT_OK(create_new_user_ns(), "no-bpf unpriv new user ns");
> +
> + if (cap_mask & old_caps)
> + cap_enable_effective(cap_mask, NULL);
> +}
> +
> +void test_deny_namespace(void)
> +{
> + struct test_deny_namespace *skel = NULL;
> + int err;
> +
> + if (test__start_subtest("unpriv_userns_create_no_bpf"))
> + test_unpriv_userns_create_no_bpf();
> +
> + skel = test_deny_namespace__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "skel load"))
> + goto close_prog;
> +
> + err = test_deny_namespace__attach(skel);
> + if (!ASSERT_OK(err, "attach"))
> + goto close_prog;
> +
> + if (test__start_subtest("userns_create_bpf"))
> + test_userns_create_bpf();
> +
> + test_deny_namespace__detach(skel);
> +
> +close_prog:
> + test_deny_namespace__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
> new file mode 100644
> index 000000000000..9ec9dabc8372
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
> @@ -0,0 +1,39 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include <errno.h>
> +#include <linux/capability.h>
> +
> +struct kernel_cap_struct {
> + __u32 cap[_LINUX_CAPABILITY_U32S_3];
> +} __attribute__((preserve_access_index));
> +
> +struct cred {
> + struct kernel_cap_struct cap_effective;
> +} __attribute__((preserve_access_index));
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("lsm/userns_create")
> +int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
> +{
> + struct kernel_cap_struct caps = cred->cap_effective;
> + int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
> + __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
> +
> + if (ret)
> + return 0;
> +
> + ret = -EPERM;
> + if (caps.cap[cap_index] & cap_mask)
> + return 0;
> +
> + return -EPERM;
> +}

Looks nice and simple.
Acked-by: Christian Brauner (Microsoft) <brauner@xxxxxxxxxx>