[PATCH V3 5/5] perf,tools: Show freq/CPU%/CORE_BUSY% in perf report --stdio

From: kan . liang
Date: Tue Jul 28 2015 - 14:45:51 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

Show frequency, CPU Utilization and percent performance for each symbol
in perf report by --stdio --show-freq-perf

In sampling group, only group leader do sampling. So only need to print
group leader's freq in --group.

Here is an example.

$ perf report --stdio --group --show-freq-perf

Overhead FREQ MHz CPU% CORE_BUSY%
Command Shared Object Symbol
........................................ ......... ..... ..........
........... ................ ......................

99.54% 99.54% 99.53% 99.53% 99.53% 2301 96 99
tchain_edit tchain_edit [.] f3
0.20% 0.20% 0.20% 0.20% 0.20% 2301 98 99
tchain_edit tchain_edit [.] f2
0.05% 0.05% 0.05% 0.05% 0.05% 2300 98 99
tchain_edit [kernel.vmlinux] [k] read_tsc

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
tools/perf/Documentation/perf-report.txt | 12 ++++++
tools/perf/builtin-report.c | 22 +++++++++-
tools/perf/ui/hist.c | 71 +++++++++++++++++++++++++++++---
tools/perf/util/hist.h | 3 ++
tools/perf/util/sort.c | 3 ++
tools/perf/util/symbol.h | 12 +++++-
6 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index c33b69f..faa8825 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -303,6 +303,18 @@ OPTIONS
special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
'perf mem' for simpler access.

+--show-freq-perf::
+ Show CPU frequency and performance result from sample read.
+ To generate the frequency and performance output, the perf.data file
+ must have been obtained by group read and using special events cycles,
+ ref-cycles, msr/tsc/, msr/aperf/ or msr/mperf/
+ Freq MHz: The frequency during the sample interval. Needs cycles and
+ ref-cycles event.
+ CPU%: CPU utilization during the sample interval. Needs ref-cycles and
+ msr/tsc/ events.
+ CORE_BUSY%: actual percent performance (APERF/MPERF%) during the
+ sample interval. Needs msr/aperf/ and msr/mperf/ events.
+
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3810251..cabbd6b5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -132,7 +132,8 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct branch_info *bi;

if ((iter->ops == &hist_iter_normal) &&
- perf_evsel__is_group_leader(evsel))
+ perf_evsel__is_group_leader(evsel) &&
+ symbol_conf.show_freq_perf)
set_he_freq_perf(rep->session, iter);

if (!ui__has_annotation())
@@ -757,6 +758,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+ OPT_BOOLEAN(0, "show-freq-perf", &symbol_conf.show_freq_perf,
+ "show CPU freqency and performance info"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
@@ -769,7 +772,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
+ struct perf_evsel *pos;
int ret = hists__init();
+ bool freq_perf_info[FREQ_PERF_MAX] = {0};

if (ret < 0)
return ret;
@@ -854,6 +859,21 @@ repeat:
symbol_conf.cumulate_callchain = false;
}

+
+ if (symbol_conf.show_freq_perf) {
+ memset(symbol_conf.freq_perf_type, false, sizeof(bool) * DISPLAY_MAX);
+ evlist__for_each(session->evlist, pos) {
+ SET_FREQ_PERF_VALUE(session->header.env.msr_pmu_type,
+ pos, freq_perf_info, true);
+ }
+ if (HAS_FREQ(freq_perf_info))
+ symbol_conf.freq_perf_type[DISPLAY_FREQ] = true;
+ if (HAS_CPU_UTIL(freq_perf_info))
+ symbol_conf.freq_perf_type[DISPLAY_CPU_UTIL] = true;
+ if (HAS_CORE_BUSY(freq_perf_info))
+ symbol_conf.freq_perf_type[DISPLAY_CORE_BUSY] = true;
+ }
+
if (setup_sorting() < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 25d6083..ad49c24 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -17,7 +17,7 @@

static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt, int len,
- hpp_snprint_fn print_fn, bool fmt_percent)
+ hpp_snprint_fn print_fn, bool fmt_percent, bool single)
{
int ret;
struct hists *hists = he->hists;
@@ -36,7 +36,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
} else
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));

- if (perf_evsel__is_group_event(evsel)) {
+ if (perf_evsel__is_group_event(evsel) && !single) {
int prev_idx, idx_delta;
struct hist_entry *pair;
int nr_members = evsel->nr_members;
@@ -109,10 +109,16 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
{
int len = fmt->user_len ?: fmt->len;
+ bool single = false;
+
+ if (((fmt == &perf_hpp__format[PERF_HPP__FREQ]) ||
+ (fmt == &perf_hpp__format[PERF_HPP__CPU_UTIL]) ||
+ (fmt == &perf_hpp__format[PERF_HPP__CORE_BUSY])))
+ single = true;

if (symbol_conf.field_sep) {
return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
- print_fn, fmt_percent);
+ print_fn, fmt_percent, single);
}

if (fmt_percent)
@@ -120,7 +126,7 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
else
len -= 1;

- return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+ return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent, single);
}

int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -234,6 +240,30 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
}

+static int hpp__single_width_fn(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel)
+{
+ int len = fmt->user_len ?: fmt->len;
+
+ if (symbol_conf.event_group && !symbol_conf.show_freq_perf)
+ len = max(len, evsel->nr_members * fmt->len);
+
+ if (len < (int)strlen(fmt->name))
+ len = strlen(fmt->name);
+
+ return len;
+}
+
+static int hpp__single_header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel)
+{
+ int len = hpp__single_width_fn(fmt, hpp, evsel);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
+}
+
+
static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
{
va_list args;
@@ -363,6 +393,9 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period)

HPP_RAW_FNS(samples, nr_events)
HPP_RAW_FNS(period, period)
+HPP_RAW_FNS(freq, freq)
+HPP_RAW_FNS(cpu_util, cpu_util)
+HPP_RAW_FNS(core_busy, core_busy)

static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *a __maybe_unused,
@@ -395,6 +428,17 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
.sort = hpp__sort_ ## _fn, \
}

+#define HPP__SINGLE_PRINT_FNS(_name, _fn) \
+ { \
+ .name = _name, \
+ .header = hpp__single_header_fn, \
+ .width = hpp__single_width_fn, \
+ .entry = hpp__entry_ ## _fn, \
+ .cmp = hpp__nop_cmp, \
+ .collapse = hpp__nop_cmp, \
+ .sort = hpp__sort_ ## _fn, \
+ }
+
#define HPP__PRINT_FNS(_name, _fn) \
{ \
.name = _name, \
@@ -414,7 +458,10 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
HPP__PRINT_FNS("Samples", samples),
- HPP__PRINT_FNS("Period", period)
+ HPP__PRINT_FNS("Period", period),
+ HPP__SINGLE_PRINT_FNS("FREQ MHz", freq),
+ HPP__SINGLE_PRINT_FNS("CPU%", cpu_util),
+ HPP__SINGLE_PRINT_FNS("CORE_BUSY%", core_busy)
};

LIST_HEAD(perf_hpp__list);
@@ -485,6 +532,15 @@ void perf_hpp__init(void)
if (symbol_conf.show_total_period)
perf_hpp__column_enable(PERF_HPP__PERIOD);

+ if (symbol_conf.show_freq_perf) {
+ if (symbol_conf.freq_perf_type[DISPLAY_FREQ])
+ perf_hpp__column_enable(PERF_HPP__FREQ);
+ if (symbol_conf.freq_perf_type[DISPLAY_CPU_UTIL])
+ perf_hpp__column_enable(PERF_HPP__CPU_UTIL);
+ if (symbol_conf.freq_perf_type[DISPLAY_CORE_BUSY])
+ perf_hpp__column_enable(PERF_HPP__CORE_BUSY);
+ }
+
/* prepend overhead field for backward compatiblity. */
list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
if (list_empty(list))
@@ -652,6 +708,9 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
return;

switch (idx) {
+ case PERF_HPP__CPU_UTIL:
+ fmt->len = 5;
+ break;
case PERF_HPP__OVERHEAD:
case PERF_HPP__OVERHEAD_SYS:
case PERF_HPP__OVERHEAD_US:
@@ -661,6 +720,8 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)

case PERF_HPP__OVERHEAD_GUEST_SYS:
case PERF_HPP__OVERHEAD_GUEST_US:
+ case PERF_HPP__FREQ:
+ case PERF_HPP__CORE_BUSY:
fmt->len = 9;
break;

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5ed8d9c..9b50e07 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -235,6 +235,9 @@ enum {
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
+ PERF_HPP__FREQ,
+ PERF_HPP__CPU_UTIL,
+ PERF_HPP__CORE_BUSY,

PERF_HPP__MAX_INDEX
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4c65a14..e75c68a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1225,6 +1225,9 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
+ DIM(PERF_HPP__FREQ, "freq"),
+ DIM(PERF_HPP__CPU_UTIL, "cpu_u"),
+ DIM(PERF_HPP__CORE_BUSY, "core_busy"),
};

#undef DIM
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51..467ecc4 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -80,6 +80,14 @@ static inline size_t symbol__size(const struct symbol *sym)
struct strlist;
struct intlist;

+enum freq_perf_type_index {
+ DISPLAY_FREQ = 0,
+ DISPLAY_CPU_UTIL,
+ DISPLAY_CORE_BUSY,
+
+ DISPLAY_MAX
+};
+
struct symbol_conf {
unsigned short priv_size;
unsigned short nr_events;
@@ -106,7 +114,9 @@ struct symbol_conf {
filter_relative,
show_hist_headers,
branch_callstack,
- has_filter;
+ has_filter,
+ show_freq_perf,
+ freq_perf_type[DISPLAY_MAX];
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/