Re: [PATCH v2 bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage

From: Song Liu
Date: Wed Jan 27 2021 - 16:47:02 EST




> On Jan 27, 2021, at 1:21 PM, Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> wrote:
>
> On Tue, Jan 26, 2021 at 1:21 AM Song Liu <songliubraving@xxxxxx> wrote:
>>
>> Task local storage is enabled for tracing programs. Add two tests for
>> task local storage without CONFIG_BPF_LSM.
>>
>> The first test measures the duration of a syscall by storing sys_enter
>> time in task local storage.
>>
>> The second test checks whether the kernel allows allocating task local
>> storage in exit_creds() (which it should not).
>>
>> Signed-off-by: Song Liu <songliubraving@xxxxxx>
>> ---
>> .../bpf/prog_tests/task_local_storage.c | 85 +++++++++++++++++++
>> .../selftests/bpf/progs/task_local_storage.c | 56 ++++++++++++
>> .../bpf/progs/task_local_storage_exit_creds.c | 32 +++++++
>> 3 files changed, 173 insertions(+)
>> create mode 100644 tools/testing/selftests/bpf/prog_tests/task_local_storage.c
>> create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c
>> create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
>> new file mode 100644
>> index 0000000000000..a8e2d3a476145
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
>> @@ -0,0 +1,85 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (c) 2021 Facebook */
>> +
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include <test_progs.h>
>> +#include "task_local_storage.skel.h"
>> +#include "task_local_storage_exit_creds.skel.h"
>> +
>> +static unsigned int duration;
>> +
>> +static void check_usleep_duration(struct task_local_storage *skel,
>> + __u64 time_us)
>> +{
>> + __u64 syscall_duration;
>> +
>> + usleep(time_us);
>> +
>> + /* save syscall_duration measure in usleep() */
>> + syscall_duration = skel->bss->syscall_duration;
>> +
>> + /* time measured by the BPF program (in nanoseconds) should be
>> + * within +/- 20% of time_us * 1000.
>> + */
>> + CHECK(syscall_duration < 800 * time_us, "syscall_duration",
>> + "syscall_duration was too small\n");
>> + CHECK(syscall_duration > 1200 * time_us, "syscall_duration",
>> + "syscall_duration was too big\n");
>
> this is going to be very flaky, especially in Travis CI. Can you
> please use something more stable that doesn't rely on time?

Let me try.

>
>> +}
>> +
>> +static void test_syscall_duration(void)
>> +{
>> + struct task_local_storage *skel;
>> + int err;
>> +
>> + skel = task_local_storage__open_and_load();
>> + if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
>> + return;
>> +
>> + skel->bss->target_pid = getpid();
>
> you are getting process ID, but comparing it with thread ID in BPF
> code. It will stop working properly if/when tests will be run in
> separate threads, so please use gettid() instead.

Will fix.

>
>> +
>> + err = task_local_storage__attach(skel);
>> + if (!ASSERT_OK(err, "skel_attach"))
>> + goto out;
>> +
>> + check_usleep_duration(skel, 2000);
>> + check_usleep_duration(skel, 3000);
>> + check_usleep_duration(skel, 4000);
>> +
>> +out:
>> + task_local_storage__destroy(skel);
>> +}
>> +
>> +static void test_exit_creds(void)
>> +{
>> + struct task_local_storage_exit_creds *skel;
>> + int err;
>> +
>> + skel = task_local_storage_exit_creds__open_and_load();
>> + if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
>> + return;
>> +
>> + err = task_local_storage_exit_creds__attach(skel);
>> + if (!ASSERT_OK(err, "skel_attach"))
>> + goto out;
>> +
>> + /* trigger at least one exit_creds() */
>> + if (CHECK_FAIL(system("ls > /dev/null")))
>> + goto out;
>> +
>> + /* sync rcu, so the following reads could get latest values */
>> + kern_sync_rcu();
>
> what are we waiting for here? you don't detach anything... system() is
> definitely going to complete by now, so whatever counter was or was
> not updated will be reflected here. Seems like kern_sync_rcu() is not
> needed?

IIUC, without sync_ruc(), even system() is finished, the kernel may not
have called exit_creds() for the "ls" task yet. Then the following check
for null_ptr_count != 0 would fail.

>
>> + ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
>> + ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
>> +out:
>> + task_local_storage_exit_creds__destroy(skel);
>> +}
>> +
>> +void test_task_local_storage(void)
>> +{
>> + if (test__start_subtest("syscall_duration"))
>> + test_syscall_duration();
>> + if (test__start_subtest("exit_creds"))
>> + test_exit_creds();
>> +}
>
> [...]
>
>> +int valid_ptr_count = 0;
>> +int null_ptr_count = 0;
>> +
>> +SEC("fentry/exit_creds")
>> +int BPF_PROG(trace_exit_creds, struct task_struct *task)
>> +{
>> + __u64 *ptr;
>> +
>> + ptr = bpf_task_storage_get(&task_storage, task, 0,
>> + BPF_LOCAL_STORAGE_GET_F_CREATE);
>> + if (ptr)
>> + valid_ptr_count++;
>> + else
>> + null_ptr_count++;
>
>
> use atomic increments?

Do you mean __sync_fetch_and_add()?

>
>> + return 0;
>> +}
>> --
>> 2.24.1