[PATCH v2 3/3] perf report: Add weight[123] output fields

From: Namhyung Kim
Date: Thu Apr 11 2024 - 14:39:31 EST


Add weight1, weight2 and weight3 fields to -F/--fields and their aliases
like 'ins_lat', 'p_stage_cyc' and 'retire_lat'. Note that they are in
the sort keys too but the difference is that output fields will sum up
the weight values and display the average.

In the sort key, users can see the distribution of weight value and I
think it's confusing we have local vs. global weight for the same weight.

For example, I experiment with mem-loads events to get the weights. On
my laptop, it seems only weight1 field is supported.

$ perf mem record -- perf test -w noploop

Let's look at the noploop function only. It has 7 samples.

$ perf script -F event,ip,sym,weight | grep noploop
# event weight ip sym
cpu/mem-loads,ldlat=30/P: 43 55b3c122bffc noploop
cpu/mem-loads,ldlat=30/P: 48 55b3c122bffc noploop
cpu/mem-loads,ldlat=30/P: 38 55b3c122bffc noploop <--- same weight
cpu/mem-loads,ldlat=30/P: 38 55b3c122bffc noploop <--- same weight
cpu/mem-loads,ldlat=30/P: 59 55b3c122bffc noploop
cpu/mem-loads,ldlat=30/P: 33 55b3c122bffc noploop
cpu/mem-loads,ldlat=30/P: 38 55b3c122bffc noploop <--- same weight

When you use the 'weight' sort key, it'd show entries with a separate
weight value separately. Also note that the first entry has 3 samples
with weight value 38, so they are displayed together and the weight
value is the sum of 3 samples (114 = 38 * 3).

$ perf report -n -s +weight | grep -e Weight -e noploop
# Overhead Samples Command Shared Object Symbol Weight
0.53% 3 perf perf [.] noploop 114
0.18% 1 perf perf [.] noploop 59
0.18% 1 perf perf [.] noploop 48
0.18% 1 perf perf [.] noploop 43
0.18% 1 perf perf [.] noploop 33

If you use 'local_weight' sort key, you can see the actualy weight.

$ perf report -n -s +local_weight | grep -e Weight -e noploop
# Overhead Samples Command Shared Object Symbol Local Weight
0.53% 3 perf perf [.] noploop 38
0.18% 1 perf perf [.] noploop 59
0.18% 1 perf perf [.] noploop 48
0.18% 1 perf perf [.] noploop 43
0.18% 1 perf perf [.] noploop 33

But when you use the -F/--field option instead, you can see the average
weight for the while noploop function (as it won't group samples by
weight value and use the default 'comm,dso,sym' sort keys).

$ perf report -n -F +weight | grep -e Weight -e noploop
Warning:
--fields weight shows the average value unlike in the --sort key.
# Overhead Samples Weight1 Command Shared Object Symbol
1.23% 7 42.4 perf perf [.] noploop

The weight1 field shows the average value:
(38 * 3 + 59 + 48 + 43 + 33) / 7 = 42.4

Also it'd show the warning that 'weight' field has the average value.
Using 'weight1' can remove the warning.

Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
v2) shows the warning when 'weight' field is used (Kan)

tools/perf/Documentation/perf-report.txt | 9 ++-
tools/perf/ui/hist.c | 92 +++++++++++++++++++-----
tools/perf/util/hist.h | 15 +++-
tools/perf/util/sort.c | 28 +++++---
tools/perf/util/sort.h | 2 +-
5 files changed, 115 insertions(+), 31 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index d8b863e01fe0..d2b1593ef700 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -121,6 +121,9 @@ OPTIONS
- type: Data type of sample memory access.
- typeoff: Offset in the data type of sample memory access.
- symoff: Offset in the symbol.
+ - weight1: Average value of event specific weight (1st field of weight_struct).
+ - weight2: Average value of event specific weight (2nd field of weight_struct).
+ - weight3: Average value of event specific weight (3rd field of weight_struct).

By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
@@ -198,7 +201,11 @@ OPTIONS
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
- overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ overhead, overhead_sys, overhead_us, overhead_children, sample, period,
+ weight1, weight2, weight3, ins_lat, p_stage_cyc and retire_lat. The
+ last 3 names are alias for the corresponding weights. When the weight
+ fields are used, they will show the average value of the weight.
+
Also it can contain any sort key(s).

By default, every sort keys not specified in -F will be appended
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 2bf959d08354..685ba2a54fd8 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -25,7 +25,7 @@

static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt, int len,
- hpp_snprint_fn print_fn, bool fmt_percent)
+ hpp_snprint_fn print_fn, enum perf_hpp_fmt_type fmtype)
{
int ret;
struct hists *hists = he->hists;
@@ -33,7 +33,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
char *buf = hpp->buf;
size_t size = hpp->size;

- if (fmt_percent) {
+ if (fmtype == PERF_HPP_FMT_TYPE__PERCENT) {
double percent = 0.0;
u64 total = hists__total_period(hists);

@@ -41,8 +41,16 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
percent = 100.0 * get_field(he) / total;

ret = hpp__call_print_fn(hpp, print_fn, fmt, len, percent);
- } else
+ } else if (fmtype == PERF_HPP_FMT_TYPE__AVERAGE) {
+ double average = 0;
+
+ if (he->stat.nr_events)
+ average = 1.0 * get_field(he) / he->stat.nr_events;
+
+ ret = hpp__call_print_fn(hpp, print_fn, fmt, len, average);
+ } else {
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
+ }

if (evsel__is_group_event(evsel)) {
int prev_idx, idx_delta;
@@ -54,6 +62,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
list_for_each_entry(pair, &he->pairs.head, pairs.node) {
u64 period = get_field(pair);
u64 total = hists__total_period(pair->hists);
+ int nr_samples = pair->stat.nr_events;

if (!total)
continue;
@@ -66,7 +75,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
* zero-fill group members in the middle which
* have no sample
*/
- if (fmt_percent) {
+ if (fmtype != PERF_HPP_FMT_TYPE__RAW) {
ret += hpp__call_print_fn(hpp, print_fn,
fmt, len, 0.0);
} else {
@@ -75,9 +84,14 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
}
}

- if (fmt_percent) {
+ if (fmtype == PERF_HPP_FMT_TYPE__PERCENT) {
ret += hpp__call_print_fn(hpp, print_fn, fmt, len,
100.0 * period / total);
+ } else if (fmtype == PERF_HPP_FMT_TYPE__AVERAGE) {
+ double avg = nr_samples ? (period / nr_samples) : 0;
+
+ ret += hpp__call_print_fn(hpp, print_fn, fmt,
+ len, avg);
} else {
ret += hpp__call_print_fn(hpp, print_fn, fmt,
len, period);
@@ -92,7 +106,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
/*
* zero-fill group members at last which have no sample
*/
- if (fmt_percent) {
+ if (fmtype != PERF_HPP_FMT_TYPE__RAW) {
ret += hpp__call_print_fn(hpp, print_fn,
fmt, len, 0.0);
} else {
@@ -114,33 +128,35 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,

int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype)
{
int len = fmt->user_len ?: fmt->len;

if (symbol_conf.field_sep) {
return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
- print_fn, fmt_percent);
+ print_fn, fmtype);
}

- if (fmt_percent)
+ if (fmtype == PERF_HPP_FMT_TYPE__PERCENT)
len -= 2; /* 2 for a space and a % sign */
else
len -= 1;

- return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+ return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmtype);
}

int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype)
{
if (!symbol_conf.cumulate_callchain) {
int len = fmt->user_len ?: fmt->len;
return snprintf(hpp->buf, hpp->size, " %*s", len - 1, "N/A");
}

- return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmt_percent);
+ return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmtype);
}

static int field_cmp(u64 field_a, u64 field_b)
@@ -350,7 +366,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
- hpp_color_scnprintf, true); \
+ hpp_color_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}

#define __HPP_ENTRY_PERCENT_FN(_type, _field) \
@@ -358,7 +374,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
- hpp_entry_scnprintf, true); \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}

#define __HPP_SORT_FN(_type, _field) \
@@ -378,7 +394,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
- hpp_color_scnprintf, true); \
+ hpp_color_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}

#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
@@ -386,7 +402,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
- hpp_entry_scnprintf, true); \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}

#define __HPP_SORT_ACC_FN(_type, _field) \
@@ -406,7 +422,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt(fmt, hpp, he, he_get_raw_##_field, " %*"PRIu64, \
- hpp_entry_scnprintf, false); \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__RAW); \
}

#define __HPP_SORT_RAW_FN(_type, _field) \
@@ -416,6 +432,26 @@ static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
return __hpp__sort(a, b, he_get_raw_##_field); \
}

+#define __HPP_ENTRY_AVERAGE_FN(_type, _field) \
+static u64 he_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.1f", \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__AVERAGE); \
+}
+
+#define __HPP_SORT_AVERAGE_FN(_type, _field) \
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *a, struct hist_entry *b) \
+{ \
+ return __hpp__sort(a, b, he_get_##_field); \
+}
+

#define HPP_PERCENT_FNS(_type, _field) \
__HPP_COLOR_PERCENT_FN(_type, _field) \
@@ -431,6 +467,10 @@ __HPP_SORT_ACC_FN(_type, _field)
__HPP_ENTRY_RAW_FN(_type, _field) \
__HPP_SORT_RAW_FN(_type, _field)

+#define HPP_AVERAGE_FNS(_type, _field) \
+__HPP_ENTRY_AVERAGE_FN(_type, _field) \
+__HPP_SORT_AVERAGE_FN(_type, _field)
+
HPP_PERCENT_FNS(overhead, period)
HPP_PERCENT_FNS(overhead_sys, period_sys)
HPP_PERCENT_FNS(overhead_us, period_us)
@@ -441,6 +481,10 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period)
HPP_RAW_FNS(samples, nr_events)
HPP_RAW_FNS(period, period)

+HPP_AVERAGE_FNS(weight1, weight1)
+HPP_AVERAGE_FNS(weight2, weight2)
+HPP_AVERAGE_FNS(weight3, weight3)
+
static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *a __maybe_unused,
struct hist_entry *b __maybe_unused)
@@ -510,7 +554,10 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
HPP__PRINT_FNS("Samples", samples, SAMPLES),
- HPP__PRINT_FNS("Period", period, PERIOD)
+ HPP__PRINT_FNS("Period", period, PERIOD),
+ HPP__PRINT_FNS("Weight1", weight1, WEIGHT1),
+ HPP__PRINT_FNS("Weight2", weight2, WEIGHT2),
+ HPP__PRINT_FNS("Weight3", weight3, WEIGHT3),
};

struct perf_hpp_list perf_hpp_list = {
@@ -526,6 +573,7 @@ struct perf_hpp_list perf_hpp_list = {
#undef HPP_PERCENT_FNS
#undef HPP_PERCENT_ACC_FNS
#undef HPP_RAW_FNS
+#undef HPP_AVERAGE_FNS

#undef __HPP_HEADER_FN
#undef __HPP_WIDTH_FN
@@ -534,9 +582,11 @@ struct perf_hpp_list perf_hpp_list = {
#undef __HPP_COLOR_ACC_PERCENT_FN
#undef __HPP_ENTRY_ACC_PERCENT_FN
#undef __HPP_ENTRY_RAW_FN
+#undef __HPP_ENTRY_AVERAGE_FN
#undef __HPP_SORT_FN
#undef __HPP_SORT_ACC_FN
#undef __HPP_SORT_RAW_FN
+#undef __HPP_SORT_AVERAGE_FN

static void fmt_free(struct perf_hpp_fmt *fmt)
{
@@ -785,6 +835,12 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
fmt->len = 12;
break;

+ case PERF_HPP__WEIGHT1:
+ case PERF_HPP__WEIGHT2:
+ case PERF_HPP__WEIGHT3:
+ fmt->len = 8;
+ break;
+
default:
break;
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f34f101c36c2..5260822b9773 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -550,6 +550,9 @@ enum {
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
+ PERF_HPP__WEIGHT1,
+ PERF_HPP__WEIGHT2,
+ PERF_HPP__WEIGHT3,

PERF_HPP__MAX_INDEX
};
@@ -596,16 +599,24 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
void perf_hpp__set_user_width(const char *width_list_str);
void hists__reset_column_width(struct hists *hists);

+enum perf_hpp_fmt_type {
+ PERF_HPP_FMT_TYPE__RAW,
+ PERF_HPP_FMT_TYPE__PERCENT,
+ PERF_HPP_FMT_TYPE__AVERAGE,
+};
+
typedef u64 (*hpp_field_fn)(struct hist_entry *he);
typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);

int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype);
int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype);

static inline void advance_hpp(struct perf_hpp *hpp, int inc)
{
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 92a1bd695e8a..add8601c57fd 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2441,6 +2441,13 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
+ DIM(PERF_HPP__WEIGHT1, "weight1"),
+ DIM(PERF_HPP__WEIGHT2, "weight2"),
+ DIM(PERF_HPP__WEIGHT3, "weight3"),
+ /* aliases for weight_struct */
+ DIM(PERF_HPP__WEIGHT2, "ins_lat"),
+ DIM(PERF_HPP__WEIGHT3, "retire_lat"),
+ DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"),
};

#undef DIM
@@ -3743,26 +3750,29 @@ void sort__setup_elide(FILE *output)
}
}

-int output_field_add(struct perf_hpp_list *list, char *tok)
+int output_field_add(struct perf_hpp_list *list, const char *tok)
{
unsigned int i;

- for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
- struct sort_dimension *sd = &common_sort_dimensions[i];
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];

- if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ if (strncasecmp(tok, hd->name, strlen(tok)))
continue;

- return __sort_dimension__add_output(list, sd);
+ if (!strcasecmp(tok, "weight"))
+ ui__warning("--fields weight shows the average value unlike in the --sort key.\n");
+
+ return __hpp_dimension__add_output(list, hd);
}

- for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
- struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];

- if (strncasecmp(tok, hd->name, strlen(tok)))
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
continue;

- return __hpp_dimension__add_output(list, hd);
+ return __sort_dimension__add_output(list, sd);
}

for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 690892a92cf3..0bd0ee3ae76b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -141,7 +141,7 @@ void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
int level);
-int output_field_add(struct perf_hpp_list *list, char *tok);
+int output_field_add(struct perf_hpp_list *list, const char *tok);
int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
--
2.44.0.683.g7961c838ac-goog