Re: [PATCH v1 01/23] perf tools: Warn if no user requested CPUs match PMU's CPUs

From: Liang, Kan
Date: Mon May 22 2023 - 07:34:27 EST




On 2023-05-22 12:52 a.m., Ian Rogers wrote:
> On Sun, May 21, 2023 at 12:04 PM Liang, Kan <kan.liang@xxxxxxxxxxxxxxx> wrote:
>>
>>
>>
>> On 2023-05-17 10:57 a.m., Ian Rogers wrote:
>>> In commit 1d3351e631fc ("perf tools: Enable on a list of CPUs for hybrid")
>>> perf on hybrid will warn if a user requested CPU doesn't match the PMU
>>> of the given event but only for hybrid PMUs. Make the logic generic
>>> for all PMUs and remove the hybrid logic.
>>
>> The logic may not work for all PMUs, especially for uncore.
>>
>> For uncore, the user should be able to read counters from any CPU of a
>> socket/die, not just the CPU from the cpumask.
>> Please see d6a2f9035bfc ("perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG").
>>
>> Thanks,
>> Kan
>
> Hi Kan,
>
> the code specifically handles this and for uncore PMUs the check is
> that the CPU is any online CPU:
>
> """
> ... Uncore PMUs list a CPU in sysfs, but this may be overwritten by a
> + * user requested CPU and so any online CPU is applicable. Core PMUs handle
> + * events on the CPUs in their list and otherwise the event isn't
> supported.
> ...
> + to_test = pmu && pmu->is_uncore ? cpu_map__online() :
> evsel__cpus(pos);
> ...
> """
>
> in the commit message this is also called out:
>
> """
> For example on a 16 (0-15) CPU system:
> ```
> $ perf stat -e imc_free_running/data_read/,cycles -C 16 true
> WARNING: Requested CPU(s) '16' not supported by PMU
> 'uncore_imc_free_running_1' for event 'imc_free_running/data_read/'
> WARNING: Requested CPU(s) '16' not supported by PMU
> 'uncore_imc_free_running_0' for event 'imc_free_running/data_read/'
> WARNING: Requested CPU(s) '16' not supported by PMU 'cpu' for event 'cycles'
> ...
> """
> CPU 16 is invalid, only 0 to 15 are valid, and so there are warnings
> for the uncore events for "imc_free_running/data_read/".
>

Ah, I thought CPU 16 is from another socket. For the invalid/offline
CPU, a warning should be OK.

Thanks,
Kan

> Thanks,
> Ian
>
>>>
>>> Warn if a CPU is requested that is offline for uncore events. Warn if
>>> a CPU is requested for a core PMU, but the CPU isn't within the cpu
>>> map of that PMU.
>>>
>>> For example on a 16 (0-15) CPU system:
>>> ```
>>> $ perf stat -e imc_free_running/data_read/,cycles -C 16 true
>>> WARNING: Requested CPU(s) '16' not supported by PMU 'uncore_imc_free_running_1' for event 'imc_free_running/data_read/'
>>> WARNING: Requested CPU(s) '16' not supported by PMU 'uncore_imc_free_running_0' for event 'imc_free_running/data_read/'
>>> WARNING: Requested CPU(s) '16' not supported by PMU 'cpu' for event 'cycles'
>>>
>>> Performance counter stats for 'CPU(s) 16':
>>>
>>> <not supported> MiB imc_free_running/data_read/
>>> <not supported> cycles
>>>
>>> 0.000570094 seconds time elapsed
>>> ```
>>>
>>> Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
>>> ---
>>> tools/perf/builtin-record.c | 6 +--
>>> tools/perf/builtin-stat.c | 5 +--
>>> tools/perf/util/cpumap.h | 2 +-
>>> tools/perf/util/evlist-hybrid.c | 74 ---------------------------------
>>> tools/perf/util/evlist-hybrid.h | 1 -
>>> tools/perf/util/evlist.c | 44 ++++++++++++++++++++
>>> tools/perf/util/evlist.h | 2 +
>>> tools/perf/util/pmu.c | 33 ---------------
>>> tools/perf/util/pmu.h | 4 --
>>> 9 files changed, 49 insertions(+), 122 deletions(-)
>>>
>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>>> index ec0f2d5f189f..9d212236c75a 100644
>>> --- a/tools/perf/builtin-record.c
>>> +++ b/tools/perf/builtin-record.c
>>> @@ -4198,11 +4198,7 @@ int cmd_record(int argc, const char **argv)
>>> /* Enable ignoring missing threads when -u/-p option is defined. */
>>> rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
>>>
>>> - if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
>>> - pr_err("failed to use cpu list %s\n",
>>> - rec->opts.target.cpu_list);
>>> - goto out;
>>> - }
>>> + evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
>>>
>>> rec->opts.target.hybrid = perf_pmu__has_hybrid();
>>>
>>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>>> index bc45cee3f77c..612467216306 100644
>>> --- a/tools/perf/builtin-stat.c
>>> +++ b/tools/perf/builtin-stat.c
>>> @@ -2462,10 +2462,7 @@ int cmd_stat(int argc, const char **argv)
>>> }
>>> }
>>>
>>> - if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
>>> - pr_err("failed to use cpu list %s\n", target.cpu_list);
>>> - goto out;
>>> - }
>>> + evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
>>>
>>> target.hybrid = perf_pmu__has_hybrid();
>>> if (evlist__create_maps(evsel_list, &target) < 0) {
>>> diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
>>> index e3426541e0aa..c1de993c083f 100644
>>> --- a/tools/perf/util/cpumap.h
>>> +++ b/tools/perf/util/cpumap.h
>>> @@ -59,7 +59,7 @@ struct perf_cpu cpu__max_present_cpu(void);
>>> /**
>>> * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
>>> */
>>> -static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
>>> +static inline bool cpu_map__is_dummy(const struct perf_cpu_map *cpus)
>>> {
>>> return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
>>> }
>>> diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
>>> index 57f02beef023..db3f5fbdebe1 100644
>>> --- a/tools/perf/util/evlist-hybrid.c
>>> +++ b/tools/perf/util/evlist-hybrid.c
>>> @@ -86,77 +86,3 @@ bool evlist__has_hybrid(struct evlist *evlist)
>>>
>>> return false;
>>> }
>>> -
>>> -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
>>> -{
>>> - struct perf_cpu_map *cpus;
>>> - struct evsel *evsel, *tmp;
>>> - struct perf_pmu *pmu;
>>> - int ret, unmatched_count = 0, events_nr = 0;
>>> -
>>> - if (!perf_pmu__has_hybrid() || !cpu_list)
>>> - return 0;
>>> -
>>> - cpus = perf_cpu_map__new(cpu_list);
>>> - if (!cpus)
>>> - return -1;
>>> -
>>> - /*
>>> - * The evsels are created with hybrid pmu's cpus. But now we
>>> - * need to check and adjust the cpus of evsel by cpu_list because
>>> - * cpu_list may cause conflicts with cpus of evsel. For example,
>>> - * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
>>> - * to adjust the cpus of evsel to cpu6-7. And then propatate maps
>>> - * in evlist__create_maps().
>>> - */
>>> - evlist__for_each_entry_safe(evlist, tmp, evsel) {
>>> - struct perf_cpu_map *matched_cpus, *unmatched_cpus;
>>> - char buf1[128], buf2[128];
>>> -
>>> - pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
>>> - if (!pmu)
>>> - continue;
>>> -
>>> - ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
>>> - &unmatched_cpus);
>>> - if (ret)
>>> - goto out;
>>> -
>>> - events_nr++;
>>> -
>>> - if (perf_cpu_map__nr(matched_cpus) > 0 &&
>>> - (perf_cpu_map__nr(unmatched_cpus) > 0 ||
>>> - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
>>> - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
>>> - perf_cpu_map__put(evsel->core.cpus);
>>> - perf_cpu_map__put(evsel->core.own_cpus);
>>> - evsel->core.cpus = perf_cpu_map__get(matched_cpus);
>>> - evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
>>> -
>>> - if (perf_cpu_map__nr(unmatched_cpus) > 0) {
>>> - cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
>>> - pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
>>> - buf1, pmu->name, evsel->name);
>>> - }
>>> - }
>>> -
>>> - if (perf_cpu_map__nr(matched_cpus) == 0) {
>>> - evlist__remove(evlist, evsel);
>>> - evsel__delete(evsel);
>>> -
>>> - cpu_map__snprint(cpus, buf1, sizeof(buf1));
>>> - cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
>>> - pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
>>> - buf1, pmu->name, pmu->name, buf2);
>>> - unmatched_count++;
>>> - }
>>> -
>>> - perf_cpu_map__put(matched_cpus);
>>> - perf_cpu_map__put(unmatched_cpus);
>>> - }
>>> - if (events_nr)
>>> - ret = (unmatched_count == events_nr) ? -1 : 0;
>>> -out:
>>> - perf_cpu_map__put(cpus);
>>> - return ret;
>>> -}
>>> diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
>>> index aacdb1b0f948..19f74b4c340a 100644
>>> --- a/tools/perf/util/evlist-hybrid.h
>>> +++ b/tools/perf/util/evlist-hybrid.h
>>> @@ -10,6 +10,5 @@
>>> int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
>>> void evlist__warn_hybrid_group(struct evlist *evlist);
>>> bool evlist__has_hybrid(struct evlist *evlist);
>>> -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
>>>
>>> #endif /* __PERF_EVLIST_HYBRID_H */
>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
>>> index a0504316b06f..5d0d99127a90 100644
>>> --- a/tools/perf/util/evlist.c
>>> +++ b/tools/perf/util/evlist.c
>>> @@ -2465,3 +2465,47 @@ void evlist__check_mem_load_aux(struct evlist *evlist)
>>> }
>>> }
>>> }
>>> +
>>> +/**
>>> + * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs
>>> + * and warn if the user CPU list is inapplicable for the event's PMUs
>>> + * CPUs. Uncore PMUs list a CPU in sysfs, but this may be overwritten by a
>>> + * user requested CPU and so any online CPU is applicable. Core PMUs handle
>>> + * events on the CPUs in their list and otherwise the event isn't supported.
>>> + * @evlist: The list of events being checked.
>>> + * @cpu_list: The user provided list of CPUs.
>>> + */
>>> +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list)
>>> +{
>>> + struct perf_cpu_map *user_requested_cpus;
>>> + struct evsel *pos;
>>> +
>>> + if (!cpu_list)
>>> + return;
>>> +
>>> + user_requested_cpus = perf_cpu_map__new(cpu_list);
>>> + if (!user_requested_cpus)
>>> + return;
>>> +
>>> + evlist__for_each_entry(evlist, pos) {
>>> + const struct perf_cpu_map *to_test;
>>> + struct perf_cpu cpu;
>>> + int idx;
>>> + bool warn = true;
>>> + const struct perf_pmu *pmu = evsel__find_pmu(pos);
>>> +
>>> + to_test = pmu && pmu->is_uncore ? cpu_map__online() : evsel__cpus(pos);
>>> +
>>> + perf_cpu_map__for_each_cpu(cpu, idx, to_test) {
>>> + if (perf_cpu_map__has(user_requested_cpus, cpu)) {
>>> + warn = false;
>>> + break;
>>> + }
>>> + }
>>> + if (warn) {
>>> + pr_warning("WARNING: Requested CPU(s) '%s' not supported by PMU '%s' for event '%s'\n",
>>> + cpu_list, pmu ? pmu->name : "cpu", evsel__name(pos));
>>> + }
>>> + }
>>> + perf_cpu_map__put(user_requested_cpus);
>>> +}
>>> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
>>> index e7e5540cc970..5e7ff44f3043 100644
>>> --- a/tools/perf/util/evlist.h
>>> +++ b/tools/perf/util/evlist.h
>>> @@ -447,4 +447,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
>>>
>>> int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
>>> void evlist__check_mem_load_aux(struct evlist *evlist);
>>> +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
>>> +
>>> #endif /* __PERF_EVLIST_H */
>>> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
>>> index f4f0afbc391c..1e0be23d4dd7 100644
>>> --- a/tools/perf/util/pmu.c
>>> +++ b/tools/perf/util/pmu.c
>>> @@ -2038,39 +2038,6 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
>>> return 0;
>>> }
>>>
>>> -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
>>> - struct perf_cpu_map **mcpus_ptr,
>>> - struct perf_cpu_map **ucpus_ptr)
>>> -{
>>> - struct perf_cpu_map *pmu_cpus = pmu->cpus;
>>> - struct perf_cpu_map *matched_cpus, *unmatched_cpus;
>>> - struct perf_cpu cpu;
>>> - int i, matched_nr = 0, unmatched_nr = 0;
>>> -
>>> - matched_cpus = perf_cpu_map__default_new();
>>> - if (!matched_cpus)
>>> - return -1;
>>> -
>>> - unmatched_cpus = perf_cpu_map__default_new();
>>> - if (!unmatched_cpus) {
>>> - perf_cpu_map__put(matched_cpus);
>>> - return -1;
>>> - }
>>> -
>>> - perf_cpu_map__for_each_cpu(cpu, i, cpus) {
>>> - if (!perf_cpu_map__has(pmu_cpus, cpu))
>>> - RC_CHK_ACCESS(unmatched_cpus)->map[unmatched_nr++] = cpu;
>>> - else
>>> - RC_CHK_ACCESS(matched_cpus)->map[matched_nr++] = cpu;
>>> - }
>>> -
>>> - perf_cpu_map__set_nr(unmatched_cpus, unmatched_nr);
>>> - perf_cpu_map__set_nr(matched_cpus, matched_nr);
>>> - *mcpus_ptr = matched_cpus;
>>> - *ucpus_ptr = unmatched_cpus;
>>> - return 0;
>>> -}
>>> -
>>> double __weak perf_pmu__cpu_slots_per_cycle(void)
>>> {
>>> return NAN;
>>> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
>>> index 0e0cb6283594..49033bb134f3 100644
>>> --- a/tools/perf/util/pmu.h
>>> +++ b/tools/perf/util/pmu.h
>>> @@ -257,10 +257,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
>>> bool perf_pmu__has_hybrid(void);
>>> int perf_pmu__match(char *pattern, char *name, char *tok);
>>>
>>> -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
>>> - struct perf_cpu_map **mcpus_ptr,
>>> - struct perf_cpu_map **ucpus_ptr);
>>> -
>>> char *pmu_find_real_name(const char *name);
>>> char *pmu_find_alias_name(const char *name);
>>> double perf_pmu__cpu_slots_per_cycle(void);