[PATCH] perf stat: Merge uncore events by default for hybrid platform
From: Jin Yao
Date: Wed Jun 16 2021 - 02:31:20 EST
On hybrid platform, by default stat aggregates and reports the event counts
per pmu. For example,
# perf stat -e cycles -a true
Performance counter stats for 'system wide':
1,400,445 cpu_core/cycles/
680,881 cpu_atom/cycles/
0.001770773 seconds time elapsed
While for uncore events, that's not a suitable method. Uncore has nothing
to do with hybrid. So for uncore events, we aggregate event counts from all
PMUs and report the counts without PMUs.
Before:
# perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
Performance counter stats for 'system wide':
2,058 uncore_arb_0/event=0x81,umask=0x1/
2,028 uncore_arb_1/event=0x81,umask=0x1/
0 uncore_arb_0/event=0x84,umask=0x1/
0 uncore_arb_1/event=0x84,umask=0x1/
0.000614498 seconds time elapsed
After:
# perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
Performance counter stats for 'system wide':
3,996 arb/event=0x81,umask=0x1/
0 arb/event=0x84,umask=0x1/
0.000630046 seconds time elapsed
Of course, we also keep the '--no-merge' still works for uncore events.
# perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
Performance counter stats for 'system wide':
1,952 uncore_arb_0/event=0x81,umask=0x1/
1,921 uncore_arb_1/event=0x81,umask=0x1/
0 uncore_arb_0/event=0x84,umask=0x1/
0 uncore_arb_1/event=0x84,umask=0x1/
0.000575536 seconds time elapsed
Signed-off-by: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
---
tools/perf/builtin-stat.c | 3 ---
tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f9f74a514315..b67a44982b61 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
evlist__check_cpu_maps(evsel_list);
- if (perf_pmu__has_hybrid())
- stat_config.no_merge = true;
-
/*
* Initialize thread_map with comm names,
* so we could print it out on output.
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b759dfd633b4..c6070f4684ca 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
}
}
+static bool is_uncore(struct evsel *evsel)
+{
+ struct perf_pmu *pmu;
+
+ if (evsel->pmu_name) {
+ pmu = perf_pmu__find(evsel->pmu_name);
+ if (pmu)
+ return pmu->is_uncore;
+ }
+
+ return false;
+}
+
static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
bool first),
@@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
if (counter->merged_stat)
return false;
cb(config, counter, data, true);
- if (config->no_merge)
- uniquify_event_name(counter);
- else if (counter->auto_merge_stats)
- collect_all_aliases(config, counter, cb, data);
+ if (perf_pmu__has_hybrid()) {
+ if (config->no_merge || !is_uncore(counter))
+ uniquify_event_name(counter);
+ else if (counter->auto_merge_stats)
+ collect_all_aliases(config, counter, cb, data);
+ } else {
+ if (config->no_merge)
+ uniquify_event_name(counter);
+ else if (counter->auto_merge_stats)
+ collect_all_aliases(config, counter, cb, data);
+ }
+
return true;
}
--
2.17.1