Re: [PATCH V5 8/8] perf tools: Add branch counter knob

From: Ian Rogers
Date: Wed Oct 25 2023 - 22:13:18 EST


On Wed, Oct 25, 2023 at 1:16 PM <kan.liang@xxxxxxxxxxxxxxx> wrote:
>
> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>
> Add a new branch filter, "counter", for the branch counter option. It is
> used to mark the events which should be logged in the branch. If it is
> applied with the -j option, the counters of all the events should be
> logged in the branch. If the legacy kernel doesn't support the new
> branch sample type, switching off the branch counter filter.
>
> The stored counter values in each branch are displayed right after the
> regular branch stack information via perf report -D.
>
> Usage examples:
>
> perf record -e "{branch-instructions,branch-misses}:S" -j any,counter
>
> Only the first event, branch-instructions, collect the LBR. Both
> branch-instructions and branch-misses are marked as logged events.
> The occurrences information of them can be found in the branch stack
> extension space of each branch.
>
> perf record -e "{cpu/branch-instructions,branch_type=any/,
> cpu/branch-misses,branch_type=counter/}"
>
> Only the first event, branch-instructions, collect the LBR. Only the
> branch-misses event is marked as a logged event.
>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>

Perhaps add a test somewhere like:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/tests/shell/record.sh
for coverage when hardware supports the feature.

Thanks,
Ian

> ---
>
> No changes since V4
>
> tools/perf/Documentation/perf-record.txt | 4 +++
> tools/perf/util/evsel.c | 31 ++++++++++++++++++++++-
> tools/perf/util/evsel.h | 1 +
> tools/perf/util/parse-branch-options.c | 1 +
> tools/perf/util/perf_event_attr_fprintf.c | 1 +
> tools/perf/util/sample.h | 1 +
> tools/perf/util/session.c | 15 +++++++++--
> 7 files changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index d5217be012d7..b6afe7cc948d 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -442,6 +442,10 @@ following filters are defined:
> 4th-Gen Xeon+ server), the save branch type is unconditionally enabled
> when the taken branch stack sampling is enabled.
> - priv: save privilege state during sampling in case binary is not available later
> + - counter: save occurrences of the event since the last branch entry. Currently, the
> + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
> + later platforms. An error out is expected if it's used on the unsupported
> + kernel or CPUs.
>
> +
> The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index a8a5ff87cc1f..58a9b8c82790 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1831,6 +1831,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.branch_counters)
> + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
> if (perf_missing_features.read_lost)
> evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
> if (perf_missing_features.weight_struct) {
> @@ -1884,7 +1886,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> * Must probe features in the order they were added to the
> * perf_event_attr interface.
> */
> - if (!perf_missing_features.read_lost &&
> + if (!perf_missing_features.branch_counters &&
> + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
> + perf_missing_features.branch_counters = true;
> + pr_debug2("switching off branch counters support\n");
> + return true;
> + } else if (!perf_missing_features.read_lost &&
> (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
> perf_missing_features.read_lost = true;
> pr_debug2("switching off PERF_FORMAT_LOST support\n");
> @@ -2344,6 +2351,18 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
> return new_val;
> }
>
> +static inline bool evsel__has_branch_counters(const struct evsel *evsel)
> +{
> + struct evsel *cur, *leader = evsel__leader(evsel);
> +
> + evlist__for_each_entry(evsel->evlist, cur) {
> + if ((leader == evsel__leader(cur)) &&
> + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
> + return true;
> + }
> + return false;
> +}
> +
> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> struct perf_sample *data)
> {
> @@ -2577,6 +2596,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>
> OVERFLOW_CHECK(array, sz, max_size);
> array = (void *)array + sz;
> +
> + if (evsel__has_branch_counters(evsel)) {
> + OVERFLOW_CHECK_u64(array);
> +
> + data->branch_stack_cntr = (u64 *)array;
> + sz = data->branch_stack->nr * sizeof(u64);
> +
> + OVERFLOW_CHECK(array, sz, max_size);
> + array = (void *)array + sz;
> + }
> }
>
> if (type & PERF_SAMPLE_REGS_USER) {
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 848534ec74fa..85f24c986392 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -191,6 +191,7 @@ struct perf_missing_features {
> bool code_page_size;
> bool weight_struct;
> bool read_lost;
> + bool branch_counters;
> };
>
> extern struct perf_missing_features perf_missing_features;
> diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
> index fd67d204d720..f7f7aff3d85a 100644
> --- a/tools/perf/util/parse-branch-options.c
> +++ b/tools/perf/util/parse-branch-options.c
> @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
> BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
> BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
> BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
> + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS),
> BRANCH_END
> };
>
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> index 2247991451f3..8f04d3b7f3ec 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
> bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
> bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
> bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
> + bit_name(COUNTERS),
> { .name = NULL, }
> };
> #undef bit_name
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index c92ad0f51ecd..70b2c3135555 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -113,6 +113,7 @@ struct perf_sample {
> void *raw_data;
> struct ip_callchain *callchain;
> struct branch_stack *branch_stack;
> + u64 *branch_stack_cntr;
> struct regs_dump user_regs;
> struct regs_dump intr_regs;
> struct stack_dump user_stack;
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 1e9aa8ed15b6..4a094ab0362b 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -1150,9 +1150,13 @@ static void callchain__printf(struct evsel *evsel,
> i, callchain->ips[i]);
> }
>
> -static void branch_stack__printf(struct perf_sample *sample, bool callstack)
> +static void branch_stack__printf(struct perf_sample *sample,
> + struct evsel *evsel)
> {
> struct branch_entry *entries = perf_sample__branch_entries(sample);
> + bool callstack = evsel__has_branch_callstack(evsel);
> + u64 *branch_stack_cntr = sample->branch_stack_cntr;
> + struct perf_env *env = evsel__env(evsel);
> uint64_t i;
>
> if (!callstack) {
> @@ -1194,6 +1198,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
> }
> }
> }
> +
> + if (branch_stack_cntr) {
> + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
> + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
> + for (i = 0; i < sample->branch_stack->nr; i++)
> + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
> + }
> }
>
> static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
> @@ -1355,7 +1366,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
> callchain__printf(evsel, sample);
>
> if (evsel__has_br_stack(evsel))
> - branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
> + branch_stack__printf(sample, evsel);
>
> if (sample_type & PERF_SAMPLE_REGS_USER)
> regs_user__printf(sample, arch);
> --
> 2.35.1
>