Re: [PATCH v2 04/18] perf jevents: Group events by PMU

From: Ian Rogers
Date: Tue Aug 29 2023 - 11:35:22 EST


On Tue, Aug 29, 2023 at 8:28 AM James Clark <james.clark@xxxxxxx> wrote:
>
>
>
> On 24/08/2023 05:13, Ian Rogers wrote:
> > Prior to this change a cpuid would map to a list of events where the
> > PMU would be encoded alongside the event information. This change
> > breaks apart each group of events so that there is a group per PMU. A
> > new table is added with the PMU's name and the list of events, the
> > original table now holding an array of these per PMU tables.
> >
> > These changes are to make it easier to get per PMU information about
> > events, rather than the current approach of scanning all events. The
> > perf binary size with BPF skeletons on x86 is reduced by about 1%. The
> > unidentified PMU is now always expanded to "cpu".
> >
>
> Hi Ian,
>
> I noticed that after this commit in perf-tools-next, events and
> descriptions aren't printed by perf list anymore (on Arm). Only the
> metrics and builtin kernel events are printed.
>
> I'm not sure if this is something that's related to test failure that
> Arnaldo mentioned on the cover letter, or something else.
>
> I didn't look into the root cause yet, just thought I would mention it
> first.


Hi James,

There was a glitch in this patch assuming json events with no PMU were
defaulting to CPU, something that happens a lot in the perf code. The
later patch:
https://lore.kernel.org/lkml/20230826062203.1058041-1-irogers@xxxxxxxxxx/
is supposed to address this.

Thanks,
Ian

> Thanks
> James
>
> > Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> > ---
> > tools/perf/pmu-events/jevents.py | 181 +++++++++++++++++++++++--------
> > tools/perf/tests/pmu-events.c | 30 +++--
> > 2 files changed, 154 insertions(+), 57 deletions(-)
> >
> > diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> > index aae5334099b1..1ad20140114c 100755
> > --- a/tools/perf/pmu-events/jevents.py
> > +++ b/tools/perf/pmu-events/jevents.py
> > @@ -42,7 +42,7 @@ _metricgroups = {}
> > # Order specific JsonEvent attributes will be visited.
> > _json_event_attributes = [
> > # cmp_sevent related attributes.
> > - 'name', 'pmu', 'topic', 'desc',
> > + 'name', 'topic', 'desc',
> > # Seems useful, put it early.
> > 'event',
> > # Short things in alphabetical order.
> > @@ -53,7 +53,7 @@ _json_event_attributes = [
> >
> > # Attributes that are in pmu_metric rather than pmu_event.
> > _json_metric_attributes = [
> > - 'pmu', 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
> > + 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
> > 'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
> > 'default_metricgroup_name', 'aggr_mode', 'event_grouping'
> > ]
> > @@ -252,7 +252,7 @@ class JsonEvent:
> > def unit_to_pmu(unit: str) -> Optional[str]:
> > """Convert a JSON Unit to Linux PMU name."""
> > if not unit:
> > - return None
> > + return 'cpu'
> > # Comment brought over from jevents.c:
> > # it's not realistic to keep adding these, we need something more scalable ...
> > table = {
> > @@ -343,10 +343,13 @@ class JsonEvent:
> > self.desc += extra_desc
> > if self.long_desc and extra_desc:
> > self.long_desc += extra_desc
> > - if self.pmu:
> > - if self.desc and not self.desc.endswith('. '):
> > - self.desc += '. '
> > - self.desc = (self.desc if self.desc else '') + ('Unit: ' + self.pmu + ' ')
> > + if self.pmu and self.pmu != 'cpu':
> > + if not self.desc:
> > + self.desc = 'Unit: ' + self.pmu
> > + else:
> > + if not self.desc.endswith('. '):
> > + self.desc += '. '
> > + self.desc += 'Unit: ' + self.pmu
> > if arch_std:
> > if arch_std.lower() in _arch_std_events:
> > event = _arch_std_events[arch_std.lower()].event
> > @@ -437,13 +440,13 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
> > def print_pending_events() -> None:
> > """Optionally close events table."""
> >
> > - def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]:
> > + def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
> > def fix_none(s: Optional[str]) -> str:
> > if s is None:
> > return ''
> > return s
> >
> > - return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
> > + return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
> > fix_none(j.metric_name))
> >
> > global _pending_events
> > @@ -458,13 +461,36 @@ def print_pending_events() -> None:
> > global event_tables
> > _event_tables.append(_pending_events_tblname)
> >
> > - _args.output_file.write(
> > - f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n')
> > -
> > + first = True
> > + last_pmu = None
> > + pmus = set()
> > for event in sorted(_pending_events, key=event_cmp_key):
> > + if event.pmu != last_pmu:
> > + if not first:
> > + _args.output_file.write('};\n')
> > + pmu_name = event.pmu.replace(',', '_')
> > + _args.output_file.write(
> > + f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
> > + first = False
> > + last_pmu = event.pmu
> > + pmus.add((event.pmu, pmu_name))
> > +
> > _args.output_file.write(event.to_c_string(metric=False))
> > _pending_events = []
> >
> > + _args.output_file.write(f"""
> > +}};
> > +
> > +const struct pmu_table_entry {_pending_events_tblname}[] = {{
> > +""")
> > + for (pmu, tbl_pmu) in sorted(pmus):
> > + pmu_name = f"{pmu}\\000"
> > + _args.output_file.write(f"""{{
> > + .entries = {_pending_events_tblname}_{tbl_pmu},
> > + .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
> > + .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
> > +}},
> > +""")
> > _args.output_file.write('};\n\n')
> >
> > def print_pending_metrics() -> None:
> > @@ -490,13 +516,36 @@ def print_pending_metrics() -> None:
> > global metric_tables
> > _metric_tables.append(_pending_metrics_tblname)
> >
> > - _args.output_file.write(
> > - f'static const struct compact_pmu_event {_pending_metrics_tblname}[] = {{\n')
> > -
> > + first = True
> > + last_pmu = None
> > + pmus = set()
> > for metric in sorted(_pending_metrics, key=metric_cmp_key):
> > + if metric.pmu != last_pmu:
> > + if not first:
> > + _args.output_file.write('};\n')
> > + pmu_name = metric.pmu.replace(',', '_')
> > + _args.output_file.write(
> > + f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
> > + first = False
> > + last_pmu = metric.pmu
> > + pmus.add((metric.pmu, pmu_name))
> > +
> > _args.output_file.write(metric.to_c_string(metric=True))
> > _pending_metrics = []
> >
> > + _args.output_file.write(f"""
> > +}};
> > +
> > +const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
> > +""")
> > + for (pmu, tbl_pmu) in sorted(pmus):
> > + pmu_name = f"{pmu}\\000"
> > + _args.output_file.write(f"""{{
> > + .entries = {_pending_metrics_tblname}_{tbl_pmu},
> > + .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
> > + .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
> > +}},
> > +""")
> > _args.output_file.write('};\n\n')
> >
> > def get_topic(topic: str) -> str:
> > @@ -532,6 +581,8 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
> >
> > topic = get_topic(item.name)
> > for event in read_json_events(item.path, topic):
> > + pmu_name = f"{event.pmu}\\000"
> > + _bcs.add(pmu_name)
> > if event.name:
> > _bcs.add(event.build_c_string(metric=False))
> > if event.metric_name:
> > @@ -577,14 +628,14 @@ def print_mapping_table(archs: Sequence[str]) -> None:
> > _args.output_file.write("""
> > /* Struct used to make the PMU event table implementation opaque to callers. */
> > struct pmu_events_table {
> > - const struct compact_pmu_event *entries;
> > - size_t length;
> > + const struct pmu_table_entry *pmus;
> > + uint32_t num_pmus;
> > };
> >
> > /* Struct used to make the PMU metric table implementation opaque to callers. */
> > struct pmu_metrics_table {
> > - const struct compact_pmu_event *entries;
> > - size_t length;
> > + const struct pmu_table_entry *pmus;
> > + uint32_t num_pmus;
> > };
> >
> > /*
> > @@ -614,12 +665,12 @@ const struct pmu_events_map pmu_events_map[] = {
> > \t.arch = "testarch",
> > \t.cpuid = "testcpu",
> > \t.event_table = {
> > -\t\t.entries = pmu_events__test_soc_cpu,
> > -\t\t.length = ARRAY_SIZE(pmu_events__test_soc_cpu),
> > +\t\t.pmus = pmu_events__test_soc_cpu,
> > +\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
> > \t},
> > \t.metric_table = {
> > -\t\t.entries = pmu_metrics__test_soc_cpu,
> > -\t\t.length = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
> > +\t\t.pmus = pmu_metrics__test_soc_cpu,
> > +\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
> > \t}
> > },
> > """)
> > @@ -649,12 +700,12 @@ const struct pmu_events_map pmu_events_map[] = {
> > \t.arch = "{arch}",
> > \t.cpuid = "{cpuid}",
> > \t.event_table = {{
> > -\t\t.entries = {event_tblname},
> > -\t\t.length = {event_size}
> > +\t\t.pmus = {event_tblname},
> > +\t\t.num_pmus = {event_size}
> > \t}},
> > \t.metric_table = {{
> > -\t\t.entries = {metric_tblname},
> > -\t\t.length = {metric_size}
> > +\t\t.pmus = {metric_tblname},
> > +\t\t.num_pmus = {metric_size}
> > \t}}
> > }},
> > """)
> > @@ -685,15 +736,15 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
> > for tblname in _sys_event_tables:
> > _args.output_file.write(f"""\t{{
> > \t\t.event_table = {{
> > -\t\t\t.entries = {tblname},
> > -\t\t\t.length = ARRAY_SIZE({tblname})
> > +\t\t\t.pmus = {tblname},
> > +\t\t\t.num_pmus = ARRAY_SIZE({tblname})
> > \t\t}},""")
> > metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
> > if metric_tblname in _sys_metric_tables:
> > _args.output_file.write(f"""
> > \t\t.metric_table = {{
> > -\t\t\t.entries = {metric_tblname},
> > -\t\t\t.length = ARRAY_SIZE({metric_tblname})
> > +\t\t\t.pmus = {metric_tblname},
> > +\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
> > \t\t}},""")
> > printed_metric_tables.append(metric_tblname)
> > _args.output_file.write(f"""
> > @@ -753,18 +804,56 @@ static void decompress_metric(int offset, struct pmu_metric *pm)
> > _args.output_file.write('\twhile (*p++);')
> > _args.output_file.write("""}
> >
> > +static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
> > + const struct pmu_table_entry *pmu,
> > + pmu_event_iter_fn fn,
> > + void *data)
> > +{
> > + int ret;
> > + struct pmu_event pe = {
> > + .pmu = &big_c_string[pmu->pmu_name.offset],
> > + };
> > +
> > + for (uint32_t i = 0; i < pmu->num_entries; i++) {
> > + decompress_event(pmu->entries[i].offset, &pe);
> > + if (!pe.name)
> > + continue;
> > + ret = fn(&pe, table, data);
> > + if (ret)
> > + return ret;
> > + }
> > + return 0;
> > + }
> > +
> > int pmu_events_table__for_each_event(const struct pmu_events_table *table,
> > pmu_event_iter_fn fn,
> > void *data)
> > {
> > - for (size_t i = 0; i < table->length; i++) {
> > - struct pmu_event pe;
> > - int ret;
> > + for (size_t i = 0; i < table->num_pmus; i++) {
> > + int ret = pmu_events_table__for_each_event_pmu(table, &table->pmus[i],
> > + fn, data);
> >
> > - decompress_event(table->entries[i].offset, &pe);
> > - if (!pe.name)
> > + if (ret)
> > + return ret;
> > + }
> > + return 0;
> > +}
> > +
> > +static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
> > + const struct pmu_table_entry *pmu,
> > + pmu_metric_iter_fn fn,
> > + void *data)
> > +{
> > + int ret;
> > + struct pmu_metric pm = {
> > + .pmu = &big_c_string[pmu->pmu_name.offset],
> > + };
> > +
> > + for (uint32_t i = 0; i < pmu->num_entries; i++) {
> > + decompress_metric(pmu->entries[i].offset, &pm);
> > + if (!pm.metric_expr)
> > continue;
> > - ret = fn(&pe, table, data);
> > + ret = fn(&pm, table, data);
> > if (ret)
> > return ret;
> > }
> > @@ -775,14 +864,10 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
> > pmu_metric_iter_fn fn,
> > void *data)
> > {
> > - for (size_t i = 0; i < table->length; i++) {
> > - struct pmu_metric pm;
> > - int ret;
> > + for (size_t i = 0; i < table->num_pmus; i++) {
> > + int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
> > + fn, data);
> >
> > - decompress_metric(table->entries[i].offset, &pm);
> > - if (!pm.metric_expr)
> > - continue;
> > - ret = fn(&pm, table, data);
> > if (ret)
> > return ret;
> > }
> > @@ -1010,7 +1095,13 @@ such as "arm/cortex-a34".''',
> > #include <stddef.h>
> >
> > struct compact_pmu_event {
> > - int offset;
> > + int offset;
> > +};
> > +
> > +struct pmu_table_entry {
> > + const struct compact_pmu_event *entries;
> > + uint32_t num_entries;
> > + struct compact_pmu_event pmu_name;
> > };
> >
> > """)
> > diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
> > index 5f541eadc088..0b6efabc3d20 100644
> > --- a/tools/perf/tests/pmu-events.c
> > +++ b/tools/perf/tests/pmu-events.c
> > @@ -44,6 +44,7 @@ struct perf_pmu_test_pmu {
> >
> > static const struct perf_pmu_test_event bp_l1_btb_correct = {
> > .event = {
> > + .pmu = "cpu",
> > .name = "bp_l1_btb_correct",
> > .event = "event=0x8a",
> > .desc = "L1 BTB Correction",
> > @@ -55,6 +56,7 @@ static const struct perf_pmu_test_event bp_l1_btb_correct = {
> >
> > static const struct perf_pmu_test_event bp_l2_btb_correct = {
> > .event = {
> > + .pmu = "cpu",
> > .name = "bp_l2_btb_correct",
> > .event = "event=0x8b",
> > .desc = "L2 BTB Correction",
> > @@ -66,6 +68,7 @@ static const struct perf_pmu_test_event bp_l2_btb_correct = {
> >
> > static const struct perf_pmu_test_event segment_reg_loads_any = {
> > .event = {
> > + .pmu = "cpu",
> > .name = "segment_reg_loads.any",
> > .event = "event=0x6,period=200000,umask=0x80",
> > .desc = "Number of segment register loads",
> > @@ -77,6 +80,7 @@ static const struct perf_pmu_test_event segment_reg_loads_any = {
> >
> > static const struct perf_pmu_test_event dispatch_blocked_any = {
> > .event = {
> > + .pmu = "cpu",
> > .name = "dispatch_blocked.any",
> > .event = "event=0x9,period=200000,umask=0x20",
> > .desc = "Memory cluster signals to block micro-op dispatch for any reason",
> > @@ -88,6 +92,7 @@ static const struct perf_pmu_test_event dispatch_blocked_any = {
> >
> > static const struct perf_pmu_test_event eist_trans = {
> > .event = {
> > + .pmu = "cpu",
> > .name = "eist_trans",
> > .event = "event=0x3a,period=200000,umask=0x0",
> > .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
> > @@ -99,6 +104,7 @@ static const struct perf_pmu_test_event eist_trans = {
> >
> > static const struct perf_pmu_test_event l3_cache_rd = {
> > .event = {
> > + .pmu = "cpu",
> > .name = "l3_cache_rd",
> > .event = "event=0x40",
> > .desc = "L3 cache access, read",
> > @@ -123,7 +129,7 @@ static const struct perf_pmu_test_event uncore_hisi_ddrc_flux_wcmd = {
> > .event = {
> > .name = "uncore_hisi_ddrc.flux_wcmd",
> > .event = "event=0x2",
> > - .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
> > + .desc = "DDRC write commands. Unit: hisi_sccl,ddrc",
> > .topic = "uncore",
> > .long_desc = "DDRC write commands",
> > .pmu = "hisi_sccl,ddrc",
> > @@ -137,7 +143,7 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
> > .event = {
> > .name = "unc_cbo_xsnp_response.miss_eviction",
> > .event = "event=0x22,umask=0x81",
> > - .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox ",
> > + .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox",
> > .topic = "uncore",
> > .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
> > .pmu = "uncore_cbox",
> > @@ -151,7 +157,7 @@ static const struct perf_pmu_test_event uncore_hyphen = {
> > .event = {
> > .name = "event-hyphen",
> > .event = "event=0xe0,umask=0x00",
> > - .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox ",
> > + .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox",
> > .topic = "uncore",
> > .long_desc = "UNC_CBO_HYPHEN",
> > .pmu = "uncore_cbox",
> > @@ -165,7 +171,7 @@ static const struct perf_pmu_test_event uncore_two_hyph = {
> > .event = {
> > .name = "event-two-hyph",
> > .event = "event=0xc0,umask=0x00",
> > - .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox ",
> > + .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox",
> > .topic = "uncore",
> > .long_desc = "UNC_CBO_TWO_HYPH",
> > .pmu = "uncore_cbox",
> > @@ -179,7 +185,7 @@ static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
> > .event = {
> > .name = "uncore_hisi_l3c.rd_hit_cpipe",
> > .event = "event=0x7",
> > - .desc = "Total read hits. Unit: hisi_sccl,l3c ",
> > + .desc = "Total read hits. Unit: hisi_sccl,l3c",
> > .topic = "uncore",
> > .long_desc = "Total read hits",
> > .pmu = "hisi_sccl,l3c",
> > @@ -193,7 +199,7 @@ static const struct perf_pmu_test_event uncore_imc_free_running_cache_miss = {
> > .event = {
> > .name = "uncore_imc_free_running.cache_miss",
> > .event = "event=0x12",
> > - .desc = "Total cache misses. Unit: uncore_imc_free_running ",
> > + .desc = "Total cache misses. Unit: uncore_imc_free_running",
> > .topic = "uncore",
> > .long_desc = "Total cache misses",
> > .pmu = "uncore_imc_free_running",
> > @@ -207,7 +213,7 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = {
> > .event = {
> > .name = "uncore_imc.cache_hits",
> > .event = "event=0x34",
> > - .desc = "Total cache hits. Unit: uncore_imc ",
> > + .desc = "Total cache hits. Unit: uncore_imc",
> > .topic = "uncore",
> > .long_desc = "Total cache hits",
> > .pmu = "uncore_imc",
> > @@ -232,13 +238,13 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = {
> > .event = {
> > .name = "sys_ddr_pmu.write_cycles",
> > .event = "event=0x2b",
> > - .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
> > + .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu",
> > .topic = "uncore",
> > .pmu = "uncore_sys_ddr_pmu",
> > .compat = "v8",
> > },
> > .alias_str = "event=0x2b",
> > - .alias_long_desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
> > + .alias_long_desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu",
> > .matching_pmu = "uncore_sys_ddr_pmu",
> > };
> >
> > @@ -246,13 +252,13 @@ static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = {
> > .event = {
> > .name = "sys_ccn_pmu.read_cycles",
> > .event = "config=0x2c",
> > - .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ",
> > + .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu",
> > .topic = "uncore",
> > .pmu = "uncore_sys_ccn_pmu",
> > .compat = "0x01",
> > },
> > .alias_str = "config=0x2c",
> > - .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ",
> > + .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu",
> > .matching_pmu = "uncore_sys_ccn_pmu",
> > };
> >
> > @@ -403,7 +409,7 @@ static int test__pmu_event_table_core_callback(const struct pmu_event *pe,
> > struct perf_pmu_test_event const **test_event_table;
> > bool found = false;
> >
> > - if (pe->pmu)
> > + if (strcmp(pe->pmu, "cpu"))
> > test_event_table = &uncore_events[0];
> > else
> > test_event_table = &core_events[0];