Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

From: Jiri Olsa
Date: Tue Nov 02 2021 - 10:10:23 EST


On Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim wrote:
> The current logic for the perf missing feature has a bug that it can
> wrongly clear some modifiers like G or H. Actually some PMUs don't
> support any filtering or exclusion while others do. But we check it
> as a global feature.
>
> For example, the cycles event can have 'G' modifier to enable it only
> in the guest mode on x86. When you don't run any VMs it'll return 0.
>
> # perf stat -a -e cycles:G sleep 1
>
> Performance counter stats for 'system wide':
>
> 0 cycles:G
>
> 1.000721670 seconds time elapsed
>
> But when it's used with other pmu events that don't support G modifier,
> it'll be reset and return non-zero values.
>
> # perf stat -a -e cycles:G,msr/tsc/ sleep 1
>
> Performance counter stats for 'system wide':
>
> 538,029,960 cycles:G
> 16,924,010,738 msr/tsc/
>
> 1.001815327 seconds time elapsed
>
> This is because of the missing feature detection logic being global.
> Add a hashmap to set pmu-specific exclude_host/guest features.
>
> Reported-by: Stephane Eranian <eranian@xxxxxxxxxx>
> Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
> ---
> v3 changes)
> * check memory allocation failure
> * add more NULL check

we were discussing this with Arnaldo yesterday and he had an idea to use
evsel->pmu link to store this info instead of hash.. I first thought we
needed 'evsel' related data, but after I gave it some thought I think that
might actually work

my argument was following usecase:

cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/

that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
but then I realized it's detection if pmu support :G and so if the :G is
not there, none of the events should have it

thoughts?

thanks,
jirka


>
> v2 changes)
> * change to enum perf_missing_pmu_features
> * pass NULL to hashmap__find() to skip checking
> * add a blank line after declaration
>
> tools/perf/util/evsel.c | 54 ++++++++++++++++++++++++++++++++++++-----
> tools/perf/util/evsel.h | 7 ++++++
> 2 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..d3ff4809627b 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> {
> evsel__exit(evsel);
> free(evsel);
> +
> + /* just free it for the first evsel */
> + hashmap__free(perf_missing_features.pmu);
> + perf_missing_features.pmu = NULL;
> }
>
> void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> return 0;
> }
>
> +#define PMU_HASH_BITS 4
> +
> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> +{
> + const struct evsel *evsel = key;
> +
> + return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> +}
> +
> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> +{
> + const struct evsel *a = key1;
> + const struct evsel *b = key2;
> +
> + return a->core.attr.type == b->core.attr.type;
> +}
> +
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> if (perf_missing_features.weight_struct) {
> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> if (perf_missing_features.mmap2)
> evsel->core.attr.mmap2 = 0;
> - if (perf_missing_features.exclude_guest)
> - evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> + if (perf_missing_features.exclude_guest) {
> + /* we only have EXCLUDE_GUEST bit, let's skip checking */
> + if (perf_missing_features.pmu != NULL &&
> + hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + evsel->core.attr.exclude_guest = 0;
> + evsel->core.attr.exclude_host = 0;
> + }
> + }
> if (perf_missing_features.lbr_flags)
> evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
> PERF_SAMPLE_BRANCH_NO_CYCLES);
> @@ -1840,6 +1867,14 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> bool evsel__detect_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.pmu == NULL) {
> + perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> + if (IS_ERR(perf_missing_features.pmu)) {
> + pr_err("Memory allocation failure!\n");
> + perf_missing_features.pmu = NULL;
> + }
> + }
> +
> /*
> * Must probe features in the order they were added to the
> * perf_event_attr interface.
> @@ -1900,10 +1935,17 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> perf_missing_features.mmap2 = true;
> pr_debug2_peo("switching off mmap2\n");
> return true;
> - } else if (!perf_missing_features.exclude_guest &&
> - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> - perf_missing_features.exclude_guest = true;
> - pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> + perf_missing_features.pmu != NULL &&
> + !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + uintptr_t pmu_features = PERF_MISSING_PMU_EXCLUDE_GUEST;
> +
> + hashmap__add(perf_missing_features.pmu, evsel, (void *)pmu_features);
> +
> + if (!perf_missing_features.exclude_guest) {
> + perf_missing_features.exclude_guest = true;
> + pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + }
> return true;
> } else if (!perf_missing_features.sample_id_all) {
> perf_missing_features.sample_id_all = true;
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..11b5ece19f0e 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -172,6 +172,13 @@ struct perf_missing_features {
> bool data_page_size;
> bool code_page_size;
> bool weight_struct;
> +
> + /* contains enum perf_missing_pmu_features below */
> + struct hashmap *pmu;
> +};
> +
> +enum perf_missing_pmu_features {
> + PERF_MISSING_PMU_EXCLUDE_GUEST = 1UL << 0,
> };
>
> extern struct perf_missing_features perf_missing_features;
> --
> 2.33.1.1089.g2158813163f-goog
>