Re: [PATCH v3 bpf-next 3/3] selftests/bpf: add test for bpf_get_branch_snapshot

From: Andrii Nakryiko
Date: Mon Aug 30 2021 - 18:28:46 EST


On Mon, Aug 30, 2021 at 2:44 PM Song Liu <songliubraving@xxxxxx> wrote:
>
> This test uses bpf_get_branch_snapshot from a fexit program. The test uses
> a target kernel function (bpf_fexit_loop_test1) and compares the record
> against kallsyms. If there isn't enough record matching kallsyms, the
> test fails.
>
> Signed-off-by: Song Liu <songliubraving@xxxxxx>
> ---
> net/bpf/test_run.c | 15 ++-
> .../bpf/prog_tests/get_branch_snapshot.c | 106 ++++++++++++++++++
> .../selftests/bpf/progs/get_branch_snapshot.c | 41 +++++++
> tools/testing/selftests/bpf/trace_helpers.c | 30 +++++
> tools/testing/selftests/bpf/trace_helpers.h | 5 +
> 5 files changed, 196 insertions(+), 1 deletion(-)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
> create mode 100644 tools/testing/selftests/bpf/progs/get_branch_snapshot.c
>
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index 2eb0e55ef54d2..6cc179a532c9c 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -231,6 +231,18 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
> return sk;
> }
>
> +noinline int bpf_fexit_loop_test1(int n)

We have bpf_testmod as part of selftests now, which allows us to add
whatever in-kernel functionality we need, without polluting the kernel
itself. fentry/fexit attach to kernel functions works as well, so do
you think we can use that here for testing?

> +{
> + int i, sum = 0;
> +
> + /* the primary goal of this test is to test LBR. Create a lot of
> + * branches in the function, so we can catch it easily.
> + */
> + for (i = 0; i < n; i++)
> + sum += i;
> + return sum;
> +}
> +
> __diag_pop();
>
> ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
> @@ -293,7 +305,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
> bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
> bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
> bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
> - bpf_fentry_test8(&arg) != 0)
> + bpf_fentry_test8(&arg) != 0 ||
> + bpf_fexit_loop_test1(101) != 5050)
> goto out;
> break;
> case BPF_MODIFY_RETURN:
> diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
> new file mode 100644
> index 0000000000000..9bb16826418fb
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2021 Facebook */
> +#include <test_progs.h>
> +#include "get_branch_snapshot.skel.h"
> +
> +static int *pfd_array;
> +static int cpu_cnt;
> +
> +static int create_perf_events(void)
> +{
> + struct perf_event_attr attr = {0};
> + int cpu;
> +
> + /* create perf event */
> + attr.size = sizeof(attr);
> + attr.type = PERF_TYPE_RAW;
> + attr.config = 0x1b00;
> + attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
> + attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL |
> + PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
> +
> + cpu_cnt = libbpf_num_possible_cpus();
> + pfd_array = malloc(sizeof(int) * cpu_cnt);
> + if (!pfd_array) {
> + cpu_cnt = 0;
> + return 1;
> + }
> +
> + for (cpu = 0; cpu < libbpf_num_possible_cpus(); cpu++) {

nit: use cpu_cnt from above?

> + pfd_array[cpu] = syscall(__NR_perf_event_open, &attr,
> + -1, cpu, -1, PERF_FLAG_FD_CLOEXEC);
> + if (pfd_array[cpu] < 0)
> + break;
> + }
> +
> + return cpu == 0;
> +}
> +
> +static void close_perf_events(void)
> +{
> + int cpu = 0;
> + int fd;
> +
> + while (cpu++ < cpu_cnt) {
> + fd = pfd_array[cpu];
> + if (fd < 0)
> + break;
> + close(fd);
> + }
> + free(pfd_array);
> +}
> +
> +void test_get_branch_snapshot(void)
> +{
> + struct get_branch_snapshot *skel;
> + int err, prog_fd;
> + __u32 retval;
> +
> + if (create_perf_events()) {
> + test__skip(); /* system doesn't support LBR */
> + goto cleanup;

Cleanup inside create_perf_events() and just return here. Or at least
initialize skel to NULL above, otherwise __destroy() below will cause
SIGSEGV, most probably.

> + }
> +
> + skel = get_branch_snapshot__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "get_branch_snapshot__open_and_load"))
> + goto cleanup;
> +
> + err = kallsyms_find("bpf_fexit_loop_test1", &skel->bss->address_low);
> + if (!ASSERT_OK(err, "kallsyms_find"))
> + goto cleanup;
> +
> + err = kallsyms_find_next("bpf_fexit_loop_test1", &skel->bss->address_high);
> + if (!ASSERT_OK(err, "kallsyms_find_next"))
> + goto cleanup;
> +
> + err = get_branch_snapshot__attach(skel);
> + if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
> + goto cleanup;
> +
> + prog_fd = bpf_program__fd(skel->progs.test1);
> + err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
> + NULL, 0, &retval, NULL);
> +
> + if (!ASSERT_OK(err, "bpf_prog_test_run"))
> + goto cleanup;
> +
> + if (skel->bss->total_entries < 16) {
> + /* too few entries for the hit/waste test */
> + test__skip();
> + goto cleanup;
> + }
> +
> + ASSERT_GT(skel->bss->test1_hits, 5, "find_test1_in_lbr");
> +
> + /* Given we stop LBR in software, we will waste a few entries.
> + * But we should try to waste as few as possibleentries. We are at

s/possibleentries/possible entries/

> + * about 7 on x86_64 systems.
> + * Add a check for < 10 so that we get heads-up when something
> + * changes and wastes too many entries.
> + */
> + ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries");
> +
> +cleanup:
> + get_branch_snapshot__destroy(skel);
> + close_perf_events();
> +}
> diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
> new file mode 100644
> index 0000000000000..9c944e7480b95
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2021 Facebook */
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +__u64 test1_hits = 0;
> +__u64 address_low = 0;
> +__u64 address_high = 0;
> +int wasted_entries = 0;
> +long total_entries = 0;
> +
> +#define MAX_LBR_ENTRIES 32

see my comment on another patch, if kernel defines this constant as
enum, we'll automatically get it from vmlinux.h.

> +
> +struct perf_branch_entry entries[MAX_LBR_ENTRIES] = {};
> +
> +
> +static inline bool in_range(__u64 val)
> +{
> + return (val >= address_low) && (val < address_high);
> +}
> +

[...]