[PATCH] perf stat: Support metrics with perf stat --per-thread
From: Jin Yao
Date: Wed Oct 11 2017 - 21:09:04 EST
perf stat --per-thread doesn't support outputting metrics, like IPC.
We should support this because it would allow easily to collect metrics
for different threads in applications.
1. Current output, for example:
root@skl:/tmp# perf stat --per-thread -p 21623
^C
Performance counter stats for process id '21623':
vmstat-21623 0.517479 task-clock (msec) # 0.000 CPUs utilized
vmstat-21623 1 context-switches
vmstat-21623 0 cpu-migrations
vmstat-21623 0 page-faults
vmstat-21623 461,306 cycles
vmstat-21623 630,724 instructions
vmstat-21623 136,265 branches
vmstat-21623 2,520 branch-misses
1.444020756 seconds time elapsed
root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623
^C
Performance counter stats for process id '21623':
vmstat-21623 631,185 inst_retired.any
vmstat-21623 605,893 cpu_clk_unhalted.thread
1.415679293 seconds time elapsed
2. With this patch, the result would be:
root@skl:/tmp# perf stat --per-thread -p 21623
^C
Performance counter stats for process id '21623':
vmstat-21623 0.533759 task-clock (msec) # 0.000 CPUs utilized
vmstat-21623 1 context-switches # 0.002 M/sec
vmstat-21623 0 cpu-migrations # 0.000 K/sec
vmstat-21623 0 page-faults # 0.000 K/sec
vmstat-21623 473,896 cycles # 0.888 GHz
vmstat-21623 631,072 instructions # 1.33 insn per cycle
vmstat-21623 136,307 branches # 255.372 M/sec
vmstat-21623 2,524 branch-misses # 1.85% of all branches
1.544862861 seconds time elapsed
root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623
^C
Performance counter stats for process id '21623':
vmstat-21623 1,259,104 inst_retired.any # 1.2 IPC
vmstat-21623 1,056,756 cpu_clk_unhalted.thread
2.040954502 seconds time elapsed
Signed-off-by: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
---
tools/perf/util/stat.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 35e9848..1164e68 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -314,6 +314,26 @@ static int process_counter_maps(struct perf_stat_config *config,
return 0;
}
+static int process_aggr_thread_counter(struct perf_evsel *counter)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = cpu_map__nr(counter->cpus);
+ int cpu, thread;
+ u64 tmp;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ u64 val = 0;
+
+ for (cpu = 0; cpu < ncpus; cpu++)
+ val += perf_counts(counter->counts, cpu, thread)->val;
+
+ tmp = val * counter->scale;
+ perf_stat__update_shadow_stats(counter, &tmp, 0);
+ }
+
+ return 0;
+}
+
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter)
{
@@ -342,6 +362,9 @@ int perf_stat_process_counter(struct perf_stat_config *config,
if (ret)
return ret;
+ if (config->aggr_mode == AGGR_THREAD)
+ return process_aggr_thread_counter(counter);
+
if (config->aggr_mode != AGGR_GLOBAL)
return 0;
--
2.7.4