Re: [PATCH v1 01/23] perf tools: Warn if no user requested CPUs match PMU's CPUs

From: Ian Rogers
Date: Mon May 22 2023 - 00:53:07 EST


On Sun, May 21, 2023 at 12:04 PM Liang, Kan <kan.liang@xxxxxxxxxxxxxxx> wrote:
>
>
>
> On 2023-05-17 10:57 a.m., Ian Rogers wrote:
> > In commit 1d3351e631fc ("perf tools: Enable on a list of CPUs for hybrid")
> > perf on hybrid will warn if a user requested CPU doesn't match the PMU
> > of the given event but only for hybrid PMUs. Make the logic generic
> > for all PMUs and remove the hybrid logic.
>
> The logic may not work for all PMUs, especially for uncore.
>
> For uncore, the user should be able to read counters from any CPU of a
> socket/die, not just the CPU from the cpumask.
> Please see d6a2f9035bfc ("perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG").
>
> Thanks,
> Kan

Hi Kan,

the code specifically handles this and for uncore PMUs the check is
that the CPU is any online CPU:

"""
... Uncore PMUs list a CPU in sysfs, but this may be overwritten by a
+ * user requested CPU and so any online CPU is applicable. Core PMUs handle
+ * events on the CPUs in their list and otherwise the event isn't
supported.
...
+ to_test = pmu && pmu->is_uncore ? cpu_map__online() :
evsel__cpus(pos);
...
"""

in the commit message this is also called out:

"""
For example on a 16 (0-15) CPU system:
```
$ perf stat -e imc_free_running/data_read/,cycles -C 16 true
WARNING: Requested CPU(s) '16' not supported by PMU
'uncore_imc_free_running_1' for event 'imc_free_running/data_read/'
WARNING: Requested CPU(s) '16' not supported by PMU
'uncore_imc_free_running_0' for event 'imc_free_running/data_read/'
WARNING: Requested CPU(s) '16' not supported by PMU 'cpu' for event 'cycles'
...
"""
CPU 16 is invalid, only 0 to 15 are valid, and so there are warnings
for the uncore events for "imc_free_running/data_read/".

Thanks,
Ian

> >
> > Warn if a CPU is requested that is offline for uncore events. Warn if
> > a CPU is requested for a core PMU, but the CPU isn't within the cpu
> > map of that PMU.
> >
> > For example on a 16 (0-15) CPU system:
> > ```
> > $ perf stat -e imc_free_running/data_read/,cycles -C 16 true
> > WARNING: Requested CPU(s) '16' not supported by PMU 'uncore_imc_free_running_1' for event 'imc_free_running/data_read/'
> > WARNING: Requested CPU(s) '16' not supported by PMU 'uncore_imc_free_running_0' for event 'imc_free_running/data_read/'
> > WARNING: Requested CPU(s) '16' not supported by PMU 'cpu' for event 'cycles'
> >
> > Performance counter stats for 'CPU(s) 16':
> >
> > <not supported> MiB imc_free_running/data_read/
> > <not supported> cycles
> >
> > 0.000570094 seconds time elapsed
> > ```
> >
> > Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> > ---
> > tools/perf/builtin-record.c | 6 +--
> > tools/perf/builtin-stat.c | 5 +--
> > tools/perf/util/cpumap.h | 2 +-
> > tools/perf/util/evlist-hybrid.c | 74 ---------------------------------
> > tools/perf/util/evlist-hybrid.h | 1 -
> > tools/perf/util/evlist.c | 44 ++++++++++++++++++++
> > tools/perf/util/evlist.h | 2 +
> > tools/perf/util/pmu.c | 33 ---------------
> > tools/perf/util/pmu.h | 4 --
> > 9 files changed, 49 insertions(+), 122 deletions(-)
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index ec0f2d5f189f..9d212236c75a 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -4198,11 +4198,7 @@ int cmd_record(int argc, const char **argv)
> > /* Enable ignoring missing threads when -u/-p option is defined. */
> > rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
> >
> > - if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
> > - pr_err("failed to use cpu list %s\n",
> > - rec->opts.target.cpu_list);
> > - goto out;
> > - }
> > + evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
> >
> > rec->opts.target.hybrid = perf_pmu__has_hybrid();
> >
> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> > index bc45cee3f77c..612467216306 100644
> > --- a/tools/perf/builtin-stat.c
> > +++ b/tools/perf/builtin-stat.c
> > @@ -2462,10 +2462,7 @@ int cmd_stat(int argc, const char **argv)
> > }
> > }
> >
> > - if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
> > - pr_err("failed to use cpu list %s\n", target.cpu_list);
> > - goto out;
> > - }
> > + evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
> >
> > target.hybrid = perf_pmu__has_hybrid();
> > if (evlist__create_maps(evsel_list, &target) < 0) {
> > diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
> > index e3426541e0aa..c1de993c083f 100644
> > --- a/tools/perf/util/cpumap.h
> > +++ b/tools/perf/util/cpumap.h
> > @@ -59,7 +59,7 @@ struct perf_cpu cpu__max_present_cpu(void);
> > /**
> > * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
> > */
> > -static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
> > +static inline bool cpu_map__is_dummy(const struct perf_cpu_map *cpus)
> > {
> > return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
> > }
> > diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
> > index 57f02beef023..db3f5fbdebe1 100644
> > --- a/tools/perf/util/evlist-hybrid.c
> > +++ b/tools/perf/util/evlist-hybrid.c
> > @@ -86,77 +86,3 @@ bool evlist__has_hybrid(struct evlist *evlist)
> >
> > return false;
> > }
> > -
> > -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
> > -{
> > - struct perf_cpu_map *cpus;
> > - struct evsel *evsel, *tmp;
> > - struct perf_pmu *pmu;
> > - int ret, unmatched_count = 0, events_nr = 0;
> > -
> > - if (!perf_pmu__has_hybrid() || !cpu_list)
> > - return 0;
> > -
> > - cpus = perf_cpu_map__new(cpu_list);
> > - if (!cpus)
> > - return -1;
> > -
> > - /*
> > - * The evsels are created with hybrid pmu's cpus. But now we
> > - * need to check and adjust the cpus of evsel by cpu_list because
> > - * cpu_list may cause conflicts with cpus of evsel. For example,
> > - * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
> > - * to adjust the cpus of evsel to cpu6-7. And then propatate maps
> > - * in evlist__create_maps().
> > - */
> > - evlist__for_each_entry_safe(evlist, tmp, evsel) {
> > - struct perf_cpu_map *matched_cpus, *unmatched_cpus;
> > - char buf1[128], buf2[128];
> > -
> > - pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
> > - if (!pmu)
> > - continue;
> > -
> > - ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
> > - &unmatched_cpus);
> > - if (ret)
> > - goto out;
> > -
> > - events_nr++;
> > -
> > - if (perf_cpu_map__nr(matched_cpus) > 0 &&
> > - (perf_cpu_map__nr(unmatched_cpus) > 0 ||
> > - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
> > - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
> > - perf_cpu_map__put(evsel->core.cpus);
> > - perf_cpu_map__put(evsel->core.own_cpus);
> > - evsel->core.cpus = perf_cpu_map__get(matched_cpus);
> > - evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
> > -
> > - if (perf_cpu_map__nr(unmatched_cpus) > 0) {
> > - cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
> > - pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
> > - buf1, pmu->name, evsel->name);
> > - }
> > - }
> > -
> > - if (perf_cpu_map__nr(matched_cpus) == 0) {
> > - evlist__remove(evlist, evsel);
> > - evsel__delete(evsel);
> > -
> > - cpu_map__snprint(cpus, buf1, sizeof(buf1));
> > - cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
> > - pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
> > - buf1, pmu->name, pmu->name, buf2);
> > - unmatched_count++;
> > - }
> > -
> > - perf_cpu_map__put(matched_cpus);
> > - perf_cpu_map__put(unmatched_cpus);
> > - }
> > - if (events_nr)
> > - ret = (unmatched_count == events_nr) ? -1 : 0;
> > -out:
> > - perf_cpu_map__put(cpus);
> > - return ret;
> > -}
> > diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
> > index aacdb1b0f948..19f74b4c340a 100644
> > --- a/tools/perf/util/evlist-hybrid.h
> > +++ b/tools/perf/util/evlist-hybrid.h
> > @@ -10,6 +10,5 @@
> > int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
> > void evlist__warn_hybrid_group(struct evlist *evlist);
> > bool evlist__has_hybrid(struct evlist *evlist);
> > -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
> >
> > #endif /* __PERF_EVLIST_HYBRID_H */
> > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> > index a0504316b06f..5d0d99127a90 100644
> > --- a/tools/perf/util/evlist.c
> > +++ b/tools/perf/util/evlist.c
> > @@ -2465,3 +2465,47 @@ void evlist__check_mem_load_aux(struct evlist *evlist)
> > }
> > }
> > }
> > +
> > +/**
> > + * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs
> > + * and warn if the user CPU list is inapplicable for the event's PMUs
> > + * CPUs. Uncore PMUs list a CPU in sysfs, but this may be overwritten by a
> > + * user requested CPU and so any online CPU is applicable. Core PMUs handle
> > + * events on the CPUs in their list and otherwise the event isn't supported.
> > + * @evlist: The list of events being checked.
> > + * @cpu_list: The user provided list of CPUs.
> > + */
> > +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list)
> > +{
> > + struct perf_cpu_map *user_requested_cpus;
> > + struct evsel *pos;
> > +
> > + if (!cpu_list)
> > + return;
> > +
> > + user_requested_cpus = perf_cpu_map__new(cpu_list);
> > + if (!user_requested_cpus)
> > + return;
> > +
> > + evlist__for_each_entry(evlist, pos) {
> > + const struct perf_cpu_map *to_test;
> > + struct perf_cpu cpu;
> > + int idx;
> > + bool warn = true;
> > + const struct perf_pmu *pmu = evsel__find_pmu(pos);
> > +
> > + to_test = pmu && pmu->is_uncore ? cpu_map__online() : evsel__cpus(pos);
> > +
> > + perf_cpu_map__for_each_cpu(cpu, idx, to_test) {
> > + if (perf_cpu_map__has(user_requested_cpus, cpu)) {
> > + warn = false;
> > + break;
> > + }
> > + }
> > + if (warn) {
> > + pr_warning("WARNING: Requested CPU(s) '%s' not supported by PMU '%s' for event '%s'\n",
> > + cpu_list, pmu ? pmu->name : "cpu", evsel__name(pos));
> > + }
> > + }
> > + perf_cpu_map__put(user_requested_cpus);
> > +}
> > diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> > index e7e5540cc970..5e7ff44f3043 100644
> > --- a/tools/perf/util/evlist.h
> > +++ b/tools/perf/util/evlist.h
> > @@ -447,4 +447,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
> >
> > int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
> > void evlist__check_mem_load_aux(struct evlist *evlist);
> > +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
> > +
> > #endif /* __PERF_EVLIST_H */
> > diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> > index f4f0afbc391c..1e0be23d4dd7 100644
> > --- a/tools/perf/util/pmu.c
> > +++ b/tools/perf/util/pmu.c
> > @@ -2038,39 +2038,6 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
> > return 0;
> > }
> >
> > -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
> > - struct perf_cpu_map **mcpus_ptr,
> > - struct perf_cpu_map **ucpus_ptr)
> > -{
> > - struct perf_cpu_map *pmu_cpus = pmu->cpus;
> > - struct perf_cpu_map *matched_cpus, *unmatched_cpus;
> > - struct perf_cpu cpu;
> > - int i, matched_nr = 0, unmatched_nr = 0;
> > -
> > - matched_cpus = perf_cpu_map__default_new();
> > - if (!matched_cpus)
> > - return -1;
> > -
> > - unmatched_cpus = perf_cpu_map__default_new();
> > - if (!unmatched_cpus) {
> > - perf_cpu_map__put(matched_cpus);
> > - return -1;
> > - }
> > -
> > - perf_cpu_map__for_each_cpu(cpu, i, cpus) {
> > - if (!perf_cpu_map__has(pmu_cpus, cpu))
> > - RC_CHK_ACCESS(unmatched_cpus)->map[unmatched_nr++] = cpu;
> > - else
> > - RC_CHK_ACCESS(matched_cpus)->map[matched_nr++] = cpu;
> > - }
> > -
> > - perf_cpu_map__set_nr(unmatched_cpus, unmatched_nr);
> > - perf_cpu_map__set_nr(matched_cpus, matched_nr);
> > - *mcpus_ptr = matched_cpus;
> > - *ucpus_ptr = unmatched_cpus;
> > - return 0;
> > -}
> > -
> > double __weak perf_pmu__cpu_slots_per_cycle(void)
> > {
> > return NAN;
> > diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> > index 0e0cb6283594..49033bb134f3 100644
> > --- a/tools/perf/util/pmu.h
> > +++ b/tools/perf/util/pmu.h
> > @@ -257,10 +257,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
> > bool perf_pmu__has_hybrid(void);
> > int perf_pmu__match(char *pattern, char *name, char *tok);
> >
> > -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
> > - struct perf_cpu_map **mcpus_ptr,
> > - struct perf_cpu_map **ucpus_ptr);
> > -
> > char *pmu_find_real_name(const char *name);
> > char *pmu_find_alias_name(const char *name);
> > double perf_pmu__cpu_slots_per_cycle(void);