Re: [PATCH bpf-next v2] selftests/bpf: Run test_perf_branches more than 1 time

From: Mykyta Yatsenko

Date: Fri Jun 05 2026 - 10:10:55 EST

On 6/5/26 2:06 PM, Maya Matuszczyk wrote:
> test_perf_branches bpf program was only being ran once, and
> perf_branches test would fail if the only sample that was passed to
> test_perf_branches didn't have branch data.
> Fix those failures by running it a few more times and waiting until a valid
> sample shows up, and leave the perf event only enabled for the duration of
> test spins.
>
> Signed-off-by: Maya Matuszczyk <mayamatuszczyk@xxxxxxxxxx>
> ---

Change looks good.
minor nit: Maybe it makes sense to migrate all CHECKs in this test to ASSERT,
though.

Acked-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>

> Changes from v1:
> - Switched from CHECK to ASSERT_OK (Alexei Starovoitov)
> - bpf -> bpf-next
>
> v1: https://lore.kernel.org/bpf/20260408095710.4090495-1-mayamatuszczyk@xxxxxxxxxx/
> ---
> .../selftests/bpf/prog_tests/perf_branches.c | 57 +++++++++++++------
> .../selftests/bpf/progs/test_perf_branches.c | 11 ++--
> 2 files changed, 43 insertions(+), 25 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
> index 0a7ef770c487..153afcdc082e 100644
> --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
> +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
> @@ -15,10 +15,6 @@ static void check_good_sample(struct test_perf_branches *skel)
> int pbe_size = sizeof(struct perf_branch_entry);
> int duration = 0;
>
> - if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
> - "checked sample validity before prog run"))
> - return;
> -
> if (CHECK(!skel->bss->valid, "output not valid",
> "no valid sample from prog"))
> return;
> @@ -49,10 +45,6 @@ static void check_bad_sample(struct test_perf_branches *skel)
> int written_stack = skel->bss->written_stack_out;
> int duration = 0;
>
> - if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
> - "checked sample validity before prog run"))
> - return;
> -
> if (CHECK(!skel->bss->valid, "output not valid",
> "no valid sample from prog"))
> return;
> @@ -73,7 +65,6 @@ static void test_perf_branches_common(int perf_fd,
> bool detached = false;
> struct bpf_link *link;
> volatile int j = 0;
> - cpu_set_t cpu_set;
>
> skel = test_perf_branches__open_and_load();
> if (CHECK(!skel, "test_perf_branches_load",
> @@ -85,19 +76,35 @@ static void test_perf_branches_common(int perf_fd,
> if (!ASSERT_OK_PTR(link, "attach_perf_event"))
> goto out_destroy_skel;
>
> - /* generate some branches on cpu 0 */
> - CPU_ZERO(&cpu_set);
> - CPU_SET(0, &cpu_set);
> - err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
> - if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
> + err = ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
> + if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_RESET)"))
> goto out_destroy;
>
> - /* Spin the loop for a while by using a high iteration count, and by
> - * checking whether the specific run count marker has been explicitly
> - * incremented at least once by the backing perf_event BPF program.
> + err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 10);
> + if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)"))
> + goto out_destroy;
> +
> + /* Make up a bunch of branch events until we time out or bpf prog sets a flag
> + * that it caught a valid event. To be completely sure that required_size_out,
> + * written_stack_out or written_global_out writes and reads won't get
> + * reordered by anyone in a way that would fail this test, use atomic
> + * operations
> */
> - for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
> + for (i = 0; i < 100000000; ++i) {
> + /* This technically should be an atomic load with acquire
> + * ordering, but we don't want to clog up memory bus while
> + * spinning
> + */
> + if (READ_ONCE(skel->bss->valid) > 0)
> + break;
> ++j;
> + }
> +
> + __atomic_thread_fence(__ATOMIC_ACQUIRE);
> +
> + err = ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, 0);
> + if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_DISABLE)"))
> + goto out_destroy;
>
> test_perf_branches__detach(skel);
> detached = true;
> @@ -121,6 +128,7 @@ static void test_perf_branches_hw(void)
> attr.size = sizeof(attr);
> attr.type = PERF_TYPE_HARDWARE;
> attr.config = PERF_COUNT_HW_CPU_CYCLES;
> + attr.disabled = 1;
> attr.freq = 1;
> attr.sample_freq = 1000;
> attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
> @@ -162,6 +170,7 @@ static void test_perf_branches_no_hw(void)
> attr.size = sizeof(attr);
> attr.type = PERF_TYPE_SOFTWARE;
> attr.config = PERF_COUNT_SW_CPU_CLOCK;
> + attr.disabled = 1;
> attr.freq = 1;
> attr.sample_freq = 1000;
> pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
> @@ -175,6 +184,18 @@ static void test_perf_branches_no_hw(void)
>
> void test_perf_branches(void)
> {
> + int err, duration = 0;
> + cpu_set_t cpu_set;
> +
> + /* Pin ourselves to cpu0 so that all branches we generate would be
> + * also on cpu0
> + */
> + CPU_ZERO(&cpu_set);
> + CPU_SET(0, &cpu_set);
> + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
> + if (!ASSERT_OK(err, "pthread_set_affinity"))
> + return;
> +
> if (test__start_subtest("perf_branches_hw"))
> test_perf_branches_hw();
> if (test__start_subtest("perf_branches_no_hw"))
> diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
> index 05ac9410cd68..9806414cebc4 100644
> --- a/tools/testing/selftests/bpf/progs/test_perf_branches.c
> +++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
> @@ -8,7 +8,6 @@
> #include <bpf/bpf_tracing.h>
>
> int valid = 0;
> -int run_cnt = 0;
> int required_size_out = 0;
> int written_stack_out = 0;
> int written_global_out = 0;
> @@ -25,13 +24,11 @@ int perf_branches(void *ctx)
> __u64 entries[4 * 3] = {0};
> int required_size, written_stack, written_global;
>
> - ++run_cnt;
> -
> /* write to stack */
> written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
> /* ignore spurious events */
> if (!written_stack)
> - return 1;
> + return 0;
>
> /* get required size */
> required_size = bpf_read_branch_records(ctx, NULL, 0,
> @@ -40,14 +37,14 @@ int perf_branches(void *ctx)
> written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0);
> /* ignore spurious events */
> if (!written_global)
> - return 1;
> + return 0;
>
> required_size_out = required_size;
> written_stack_out = written_stack;
> written_global_out = written_global;
> - valid = 1;
> + __atomic_add_fetch(&valid, 1, __ATOMIC_RELEASE);
>
> - return 0;
> + return 1;
> }
>
> char _license[] SEC("license") = "GPL";