RE: [RFC PATCH v4 11/15] perf stat: Handle taken alone in hardware-grouping

From: Wang, Weilin
Date: Tue Mar 26 2024 - 19:06:39 EST




> -----Original Message-----
> From: Ian Rogers <irogers@xxxxxxxxxx>
> Sent: Saturday, March 23, 2024 10:25 PM
> To: Wang, Weilin <weilin.wang@xxxxxxxxx>
> Cc: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>; Namhyung Kim
> <namhyung@xxxxxxxxxx>; Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>;
> Peter Zijlstra <peterz@xxxxxxxxxxxxx>; Ingo Molnar <mingo@xxxxxxxxxx>;
> Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>; Jiri Olsa
> <jolsa@xxxxxxxxxx>; Hunter, Adrian <adrian.hunter@xxxxxxxxx>; linux-perf-
> users@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Taylor, Perry
> <perry.taylor@xxxxxxxxx>; Alt, Samantha <samantha.alt@xxxxxxxxx>; Biggers,
> Caleb <caleb.biggers@xxxxxxxxx>; Mark Rutland <mark.rutland@xxxxxxx>
> Subject: Re: [RFC PATCH v4 11/15] perf stat: Handle taken alone in hardware-
> grouping
>
> On Thu, Feb 8, 2024 at 7:14 PM <weilin.wang@xxxxxxxxx> wrote:
> >
> > From: Weilin Wang <weilin.wang@xxxxxxxxx>
> >
> > Add taken alone into consideration when grouping. Only one taken
> > alone event is supported per group.
>
> Is there an example of this?

Yes, there are about 20+ taken alone core events in SPR. Events like
UOPS_RETIRED.MS, FRONTEND_RETIRED.DSB_MISS,
INT_MISC.UNKNOWN_BRANCH_CYCLES, and etc.

If we run `perf stat -e` with two such kind of events, we will see multiplexing.
./perf stat -e "INT_MISC.UNKNOWN_BRANCH_CYCLES,UOPS_RETIRED.MS" -a sleep 1

Performance counter stats for 'system wide':

3,257,425 INT_MISC.UNKNOWN_BRANCH_CYCLES (50.04%)
134,098,908 UOPS_RETIRED.MS (49.96%)

1.008902540 seconds time elapsed

This multiplexing should not happen if we run only one such type of event at a time.
./perf stat -e "INT_MISC.CLEAR_RESTEER_CYCLES,UOPS_RETIRED.MS" -a sleep 1

Performance counter stats for 'system wide':

5,107,299 INT_MISC.CLEAR_RESTEER_CYCLES
21,929,170 UOPS_RETIRED.MS

1.009937991 seconds time elapsed

Should I add this example in the commit message?

Thanks,
Weilin

>
> > Signed-off-by: Weilin Wang <weilin.wang@xxxxxxxxx>
> > ---
> > tools/perf/pmu-events/jevents.py | 7 ++++++-
> > tools/perf/pmu-events/pmu-events.h | 1 +
> > tools/perf/util/metricgroup.c | 20 +++++++++++++++-----
> > 3 files changed, 22 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-
> events/jevents.py
> > index bc91b7efa49a..4fbb367a3228 100755
> > --- a/tools/perf/pmu-events/jevents.py
> > +++ b/tools/perf/pmu-events/jevents.py
> > @@ -56,7 +56,9 @@ _json_event_attributes = [
> > # The list of counter(s) this event could use
> > 'counters',
> > # Longer things (the last won't be iterated over during decompress).
> > - 'long_desc'
> > + 'long_desc',
> > + # Taken alone event could not be collected in the same group with other
> taken alone event
> > + 'taken_alone'
> > ]
> >
> > # Attributes that are in pmu_unit_layout.
> > @@ -355,6 +357,9 @@ class JsonEvent:
> > self.num_counters = jd.get('NumCounters')
> > # Number of fixed counter
> > self.num_fixed_counters = jd.get('NumFixedCounters')
> > + # If the event is taken alone event, which cannot be grouped with any
> other
> > + # taken alone event.
> > + self.taken_alone = jd.get('TakenAlone')
> > filter = jd.get('Filter')
> > self.unit = jd.get('ScaleUnit')
> > self.perpkg = jd.get('PerPkg')
> > diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-
> events/pmu-events.h
> > index e245e4738970..837edfeb676a 100644
> > --- a/tools/perf/pmu-events/pmu-events.h
> > +++ b/tools/perf/pmu-events/pmu-events.h
> > @@ -54,6 +54,7 @@ struct pmu_event {
> > const char *unit;
> > bool perpkg;
> > bool deprecated;
> > + bool taken_alone;
> > };
> >
> > struct pmu_metric {
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index fe115f0880f9..95d3868819e3 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -189,6 +189,7 @@ struct metricgroup__event_info {
> > /** The event uses special counters that we consider that as free counter
> > * during the event grouping*/
> > bool free_counter;
> > + bool taken_alone;
> > /** The counters the event allowed to be collected on. */
> > DECLARE_BITMAP(counters, NR_COUNTERS);
> > };
> > @@ -235,6 +236,7 @@ struct metricgroup__group {
> > DECLARE_BITMAP(fixed_counters, NR_COUNTERS);
> > /** Head to the list of event names in this group*/
> > struct list_head event_head;
> > + bool taken_alone;
> > };
> >
> > struct metricgroup__group_events {
> > @@ -1717,6 +1719,7 @@ static void metricgroup__free_pmu_info(struct
> list_head *pmu_info_list)
> > static struct metricgroup__event_info *event_info__new(const char *name,
> > const char *pmu_name,
> > const char *counter,
> > + bool taken_alone,
> > bool free_counter)
> > {
> > int ret = 0;
> > @@ -1731,6 +1734,7 @@ static struct metricgroup__event_info
> *event_info__new(const char *name,
> > pmu_name = "core";
> >
> > e->name = name;
> > + e->taken_alone = taken_alone;
> > e->free_counter = free_counter;
> > e->pmu_name = pmu_name;
> > if (free_counter) {
> > @@ -1769,7 +1773,8 @@ static int
> metricgroup__add_metric_event_callback(const struct pmu_event *pe,
> > if (!strcasecmp(pe->name, d->event_name)) {
> > if (!pe->counters)
> > return -EINVAL;
> > - event = event_info__new(d->event_id, pe->pmu, pe->counters,
> /*free_counter=*/false);
> > + event = event_info__new(d->event_id, pe->pmu, pe->counters,
> > + pe->taken_alone, /*free_counter=*/false);
> > if (!event)
> > return -ENOMEM;
> > list_add(&event->nd, d->list);
> > @@ -1892,6 +1897,8 @@ static int find_and_set_counters(struct
> metricgroup__event_info *e,
> > int ret;
> > unsigned long find_bit = 0;
> >
> > + if (e->taken_alone && current_group->taken_alone)
> > + return -ENOSPC;
> > if (e->free_counter)
> > return 0;
> > if (e->fixed_counter) {
> > @@ -1926,11 +1933,13 @@ static int _insert_event(struct
> metricgroup__event_info *e,
> > list_add(&event->nd, &group->event_head);
> > else
> > list_add_tail(&event->nd, &group->event_head);
> > + if (e->taken_alone)
> > + group->taken_alone = true;
> > return 0;
> > }
> >
> > /**
> > - * Insert the new_group node at the end of the group list.
> > + * Initialize the new group and insert it to the end of the group list.
> > */
> > static int insert_new_group(struct list_head *head,
> > struct metricgroup__group *new_group,
> > @@ -1940,6 +1949,7 @@ static int insert_new_group(struct list_head
> *head,
> > INIT_LIST_HEAD(&new_group->event_head);
> > fill_counter_bitmap(new_group->gp_counters, 0, num_counters);
> > fill_counter_bitmap(new_group->fixed_counters, 0,
> num_fixed_counters);
> > + new_group->taken_alone = false;
> > list_add_tail(&new_group->nd, head);
> > return 0;
> > }
> > @@ -2143,8 +2153,8 @@ static int create_grouping(struct list_head
> *pmu_info_list,
> > //TODO: for each new core group, we should consider to add events that
> uses fixed counters
> > list_for_each_entry(e, event_info_list, nd) {
> > bitmap_scnprintf(e->counters, NR_COUNTERS, bit_buf,
> NR_COUNTERS);
> > - pr_debug("Event name %s, [pmu]=%s, [counters]=%s\n", e->name,
> > - e->pmu_name, bit_buf);
> > + pr_debug("Event name %s, [pmu]=%s, [counters]=%s,
> [taken_alone]=%d\n",
> > + e->name, e->pmu_name, bit_buf, e->taken_alone);
> > ret = assign_event_grouping(e, pmu_info_list, &groups);
> > if (ret)
> > goto out;
> > @@ -2191,7 +2201,7 @@ static int hw_aware_build_grouping(struct
> expr_parse_ctx *ctx __maybe_unused,
> > if (is_special_event(id)) {
> > struct metricgroup__event_info *event;
> >
> > - event = event_info__new(id, "default_core", "0",
> > + event = event_info__new(id, "default_core", "0", false,
>
> nit: document constant arguments, so "/*taken_alone=*/false,"
>
> Thanks,
> Ian
>
> > /*free_counter=*/true);
> > if (!event)
> > goto err_out;
> > --
> > 2.42.0
> >