Re: [PATCH v6] perf utilities: cln_size header
From: Namhyung Kim
Date: Fri Apr 03 2026 - 20:29:36 EST
On Fri, Apr 03, 2026 at 10:08:06PM +0000, Ricky Ringler wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> V6: Namhyung feedback and tests
> V5: Namhyung feedback
> V4: Ian feedback
> V3: Rebase off perf-tools-next round two
> V2: Rebase off perf-tools-next
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
>
> Testing example with feat enabled:
> $ perf record ./Example
> $ perf report --header-only | grep -C 3 cacheline
> CPU_DOMAIN_INFO info available, use -I to display
> e_machine : 62
> e_flags : 0
> cacheline size: 64
> missing features: TRACING_DATA BUILD_ID BRANCH_STACK GROUP_DESC AUXTRACE \
> STAT CLOCKID DIR_FORMAT COMPRESSED CLOCK_DATA
> ========
>
> Signed-off-by: Ricky Ringler <ricky.ringler@xxxxxxxxx>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 33 +++++++++++++++++++++++++++++
> tools/perf/util/header.h | 1 +
> tools/perf/util/sort.c | 41 +++++++++++++++++++++++++++----------
> 5 files changed, 66 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 5b29f4296861..11ac7c8c4be3 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> case HEADER_CPU_DOMAIN_INFO:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..c7052ac1f856 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -112,6 +112,7 @@ struct perf_env {
> struct cpu_cache_level *caches;
> struct cpu_domain_map **cpu_domain;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 9142a8ba4019..4d852bd4ca9a 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +#define DEFAULT_CACHELINE_SIZE = 64
I'm curious how your compiler could build this.
> +
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> +
> + if(!cln_size)
> + cln_size = DEFAULT_CACHELINE_SIZE;
It should have a syntax error, right?
> +
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
[SNIP]
> @@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> +
> + if (evsel) {
> + struct perf_session *session = evsel__session(evsel);
> +
> + ret = session->header.env.cln_size;
> + }
> + }
> +
> + if (!ret || ret < 1) {
> + int default_cacheline_size = 64; // avoid div/0 later
As I said in the previous comment, please use DEFAULT_CALLCHAIN_SIZE
here.
Thanks,
Namhyung
> +
> + ret = default_cacheline_size;
> + }
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>