[PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

From: Jin Yao
Date: Tue Oct 15 2019 - 01:35:26 EST


It would be useful to support sorting for all blocks by the
sampled cycles percent per block. This is useful to concentrate
on the globally hottest blocks.

This patch implements a new sort option "total_cycles" which sorts
all blocks by 'Sampled Cycles%'. The 'Sampled Cycles%' is the
percent that is,
block sampled cycles aggregation / total sampled cycles

Note that, this patch only supports "--stdio" mode.

For example,

perf record -b ./div
perf report -s total_cycles --stdio

# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 2M of event 'cycles'
# Event count (approx.): 2753248
#
# Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object
# ............... .............. ........... .......... ................................................................. ....................
#
26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div
15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so
5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div
4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so
4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div
3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div
3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so
3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so
2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so
2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so
2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so
2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so
2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so
2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div
1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so
1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div
1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so
0.25% 182.5K 0.02% 1 [random_r.c:388 -> random_r.c:391] libc-2.27.so
0.00% 48 1.07% 48 [x86_pmu_enable+284 -> x86_pmu_enable+298] [kernel.kallsyms]
0.00% 74 1.64% 74 [vm_mmap_pgoff+0 -> vm_mmap_pgoff+92] [kernel.kallsyms]
0.00% 73 1.62% 73 [vm_mmap+0 -> vm_mmap+48] [kernel.kallsyms]
0.00% 63 0.69% 31 [up_write+0 -> up_write+34] [kernel.kallsyms]
0.00% 13 0.29% 13 [setup_arg_pages+396 -> setup_arg_pages+413] [kernel.kallsyms]
0.00% 3 0.07% 3 [setup_arg_pages+418 -> setup_arg_pages+450] [kernel.kallsyms]
0.00% 616 6.84% 308 [security_mmap_file+0 -> security_mmap_file+72] [kernel.kallsyms]
0.00% 23 0.51% 23 [security_mmap_file+77 -> security_mmap_file+87] [kernel.kallsyms]
0.00% 4 0.02% 1 [sched_clock+0 -> sched_clock+4] [kernel.kallsyms]
0.00% 4 0.02% 1 [sched_clock+9 -> sched_clock+12] [kernel.kallsyms]
0.00% 1 0.02% 1 [rcu_nmi_exit+0 -> rcu_nmi_exit+9] [kernel.kallsyms]
0.00% 1 0.02% 1 [rcu_nmi_exit+14 -> rcu_nmi_exit+15] [kernel.kallsyms]
0.00% 5 0.11% 5 [rcu_irq_exit+0 -> rcu_irq_exit+79] [kernel.kallsyms]
0.00% 2 0.04% 2 [printk_nmi_exit+0 -> printk_nmi_exit+16] [kernel.kallsyms]
0.00% 3 0.07% 3 [perf_sample_event_took+0 -> perf_sample_event_took+56] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_sample_event_took+184 -> perf_sample_event_took+185] [kernel.kallsyms]
0.00% 72 1.60% 72 [perf_iterate_ctx+0 -> perf_iterate_ctx+50] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_event_nmi_handler+50 -> perf_event_nmi_handler+52] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_event_nmi_handler+57 -> perf_event_nmi_handler+63] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_event_nmi_handler+68 -> perf_event_nmi_handler+74] [kernel.kallsyms]

Signed-off-by: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
---
tools/perf/Documentation/perf-report.txt | 10 +
tools/perf/builtin-report.c | 423 ++++++++++++++++++++++-
tools/perf/ui/stdio/hist.c | 22 ++
tools/perf/util/block.h | 1 +
tools/perf/util/hist.c | 4 +
tools/perf/util/sort.c | 5 +
tools/perf/util/sort.h | 1 +
tools/perf/util/symbol_conf.h | 1 +
8 files changed, 463 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7315f155803f..b1f9c93a91fd 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -124,6 +124,7 @@ OPTIONS
- in_tx: branch in TSX transaction
- abort: TSX transaction abort.
- cycles: Cycles in basic block
+ - total_cycles: sort all blocks by the 'Sampled Cycles%'.

And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
@@ -136,6 +137,15 @@ OPTIONS
executed, such as a memory access bottleneck. If a function has high overhead
and low IPC, it's worth further analyzing it to optimize its performance.

+ When the total_cycles is specified, it supports sorting for all blocks by the
+ 'Sampled Cycles%'. This is useful to concentrate on the globally slowest blocks.
+ In output, there are some new columns:
+ 'Sampled Cycles%' - block sampled cycles aggregation / total sampled cycles
+ 'Sampled Cycles' - block sampled cycles aggregation
+ 'Avg Cycles%' - block average sampled cycles / sum of total block average
+ sampled cycles
+ 'Avg Cycles' - block average sampled cycles
+
If the --mem-mode option is used, the following sort keys are also available
(incompatible with --branch-stack):
symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cdb436d6e11f..f04a7b225a81 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -51,6 +51,7 @@
#include "util/util.h" // perf_tip()
#include "ui/ui.h"
#include "ui/progress.h"
+#include "util/block.h"

#include <dlfcn.h>
#include <errno.h>
@@ -96,10 +97,64 @@ struct report {
float min_percent;
u64 nr_entries;
u64 queue_size;
+ u64 cycles_count;
+ u64 block_cycles;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
bool symbol_ipc;
+ bool total_cycles;
+ struct block_hist block_hist;
+};
+
+struct block_fmt {
+ struct perf_hpp_fmt fmt;
+ int idx;
+ int width;
+ const char *header;
+ struct report *rep;
+};
+
+enum {
+ PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
+ PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+ PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+ PERF_HPP_REPORT__BLOCK_RANGE,
+ PERF_HPP_REPORT__BLOCK_DSO,
+ PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+
+static struct block_header_column{
+ const char *name;
+ int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+ [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+ .name = "Sampled Cycles%",
+ .width = 15,
+ },
+ [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+ .name = "Sampled Cycles",
+ .width = 14,
+ },
+ [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+ .name = "Avg Cycles%",
+ .width = 11,
+ },
+ [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+ .name = "Avg Cycles",
+ .width = 10,
+ },
+ [PERF_HPP_REPORT__BLOCK_RANGE] = {
+ .name = "[Program Block Range]",
+ .width = 70,
+ },
+ [PERF_HPP_REPORT__BLOCK_DSO] = {
+ .name = "Shared Object",
+ .width = 20,
+ }
};

static int report__config(const char *var, const char *value, void *cb)
@@ -277,7 +332,8 @@ static int process_sample_event(struct perf_tool *tool,
if (!sample->branch_stack)
goto out_put;

- iter.add_entry_cb = hist_iter__branch_callback;
+ if (!rep->total_cycles)
+ iter.add_entry_cb = hist_iter__branch_callback;
iter.ops = &hist_iter_branch;
} else if (rep->mem_mode) {
iter.ops = &hist_iter_mem;
@@ -290,9 +346,10 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;

- if (ui__has_annotation() || rep->symbol_ipc) {
+ if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles) {
hist__account_cycles(sample->branch_stack, &al, sample,
- rep->nonany_branch_mode, NULL);
+ rep->nonany_branch_mode,
+ &rep->cycles_count);
}

ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
@@ -480,6 +537,349 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
return ret + fprintf(fp, "\n#\n");
}

+static int block_column_header(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ block_fmt->header);
+}
+
+static int block_column_width(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+ return block_fmt->width;
+}
+
+static int block_cycles_cov_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct report *rep = block_fmt->rep;
+ struct block_info *bi = he->block_info;
+ double ratio = 0.0;
+ char buf[16];
+
+ if (rep->cycles_count)
+ ratio = (double)bi->cycles / (double)rep->cycles_count;
+
+ sprintf(buf, "%.2f%%", 100.0 * ratio);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int64_t block_cycles_cov_sort(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct report *rep = block_fmt->rep;
+ struct block_info *bi_l = left->block_info;
+ struct block_info *bi_r = right->block_info;
+ double l, r;
+
+ if (rep->cycles_count) {
+ l = ((double)bi_l->cycles / (double)rep->cycles_count) * 1000.0;
+ r = ((double)bi_r->cycles / (double)rep->cycles_count) * 1000.0;
+ return (int64_t)l - (int64_t)r;
+ }
+
+ return 0;
+}
+
+static void cycles_string(u64 cycles, char *buf, int size)
+{
+ if (cycles >= 1000000)
+ scnprintf(buf, size, "%.1fM", (double)cycles / 1000000.0);
+ else if (cycles >= 1000)
+ scnprintf(buf, size, "%.1fK", (double)cycles / 1000.0);
+ else
+ scnprintf(buf, size, "%1d", cycles);
+}
+
+static int block_cycles_lbr_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char cycles_buf[16];
+
+ cycles_string(bi->cycles_aggr, cycles_buf, sizeof(cycles_buf));
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ cycles_buf);
+}
+
+static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct report *rep = block_fmt->rep;
+ struct block_info *bi = he->block_info;
+ double ratio = 0.0;
+ u64 avg;
+ char buf[16];
+
+ if (rep->block_cycles && bi->num_aggr) {
+ avg = bi->cycles_aggr / bi->num_aggr;
+ ratio = (double)avg / (double)rep->block_cycles;
+ }
+
+ sprintf(buf, "%.2f%%", 100.0 * ratio);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char cycles_buf[16];
+
+ cycles_string(bi->cycles_aggr / bi->num_aggr, cycles_buf,
+ sizeof(cycles_buf));
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ cycles_buf);
+}
+
+static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char buf[128];
+ char *start_line, *end_line;
+
+ symbol_conf.disable_add2line_warn = true;
+
+ start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
+ he->ms.sym);
+
+ end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
+ he->ms.sym);
+
+ if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ scnprintf(buf, sizeof(buf), "[%s -> %s]",
+ start_line, end_line);
+ } else {
+ scnprintf(buf, sizeof(buf), "[%7lx -> %7lx]",
+ bi->start, bi->end);
+ }
+
+ free_srcline(start_line);
+ free_srcline(end_line);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct map *map = he->ms.map;
+
+ if (map && map->dso) {
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ map->dso->short_name);
+ }
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ "[unknown]");
+}
+
+static void init_block_header(struct block_fmt *block_fmt)
+{
+ struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+ BUG_ON(block_fmt->idx >= PERF_HPP_REPORT__BLOCK_MAX_INDEX);
+
+ block_fmt->header = block_columns[block_fmt->idx].name;
+ block_fmt->width = block_columns[block_fmt->idx].width;
+
+ fmt->header = block_column_header;
+ fmt->width = block_column_width;
+}
+
+static void block_hpp_register(struct block_fmt *block_fmt, int idx,
+ struct perf_hpp_list *hpp_list,
+ struct report *rep)
+{
+ struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+ block_fmt->rep = rep;
+ block_fmt->idx = idx;
+ INIT_LIST_HEAD(&fmt->list);
+ INIT_LIST_HEAD(&fmt->sort_list);
+
+ switch (idx) {
+ case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV:
+ fmt->entry = block_cycles_cov_entry;
+ fmt->cmp = block_info__cmp;
+ fmt->sort = block_cycles_cov_sort;
+ break;
+ case PERF_HPP_REPORT__BLOCK_LBR_CYCLES:
+ fmt->entry = block_cycles_lbr_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
+ fmt->entry = block_cycles_pct_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
+ fmt->entry = block_avg_cycles_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_RANGE:
+ fmt->entry = block_range_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_DSO:
+ fmt->entry = block_dso_entry;
+ break;
+ default:
+ return;
+ }
+
+ init_block_header(block_fmt);
+ perf_hpp_list__column_register(hpp_list, fmt);
+}
+
+static void register_block_columns(struct perf_hpp_list *hpp_list,
+ struct report *rep)
+{
+ for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
+ block_hpp_register(&block_fmts[i], i, hpp_list, rep);
+}
+
+static void init_block_hist(struct block_hist *bh, struct report *rep)
+{
+ __hists__init(&bh->block_hists, &bh->block_list);
+ perf_hpp_list__init(&bh->block_list);
+ bh->block_list.nr_header_lines = 1;
+
+ register_block_columns(&bh->block_list, rep);
+
+ perf_hpp_list__register_sort_field(&bh->block_list,
+ &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV].fmt);
+}
+
+static void init_block_info(struct block_info *bi, struct symbol *sym,
+ struct cyc_hist *ch, int offset,
+ u64 total_cycles)
+{
+ bi->sym = sym;
+ bi->start = ch->start;
+ bi->end = offset;
+ bi->cycles = ch->cycles;
+ bi->cycles_aggr = ch->cycles_aggr;
+ bi->num = ch->num;
+ bi->num_aggr = ch->num_aggr;
+ bi->total_cycles = total_cycles;
+}
+
+static int add_block_per_sym(struct hist_entry *he, struct block_hist *bh,
+ u64 *block_cycles, u64 total_cycles)
+{
+ struct annotation *notes;
+ struct cyc_hist *ch;
+ static struct addr_location al;
+ u64 cycles = 0;
+
+ if (!he->ms.map || !he->ms.sym)
+ return 0;
+
+ memset(&al, 0, sizeof(al));
+ al.map = he->ms.map;
+ al.sym = he->ms.sym;
+
+ notes = symbol__annotation(he->ms.sym);
+ if (!notes || !notes->src || !notes->src->cycles_hist)
+ return 0;
+ ch = notes->src->cycles_hist;
+ for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
+ if (ch[i].num_aggr) {
+ struct block_info *bi;
+ struct hist_entry *he_block;
+
+ bi = block_info__new();
+ if (!bi)
+ return -1;
+
+ init_block_info(bi, he->ms.sym, &ch[i], i,
+ total_cycles);
+ cycles += bi->cycles_aggr / bi->num_aggr;
+
+ he_block = hists__add_entry_block(&bh->block_hists,
+ &al, bi);
+ if (!he_block) {
+ block_info__put(bi);
+ return -1;
+ }
+ }
+ }
+
+ if (block_cycles)
+ *block_cycles += cycles;
+
+ return 0;
+}
+
+static int resort_cb(struct hist_entry *he, void *arg __maybe_unused)
+{
+ /* Skip the calculation of column length in output_resort */
+ he->filtered = true;
+ return 0;
+}
+
+static void hists__clear_filtered(struct hists *hists)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *he;
+
+ while (next) {
+ he = rb_entry(next, struct hist_entry, rb_node);
+ he->filtered = false;
+ next = rb_next(&he->rb_node);
+ }
+}
+
+static void get_block_hists(struct hists *hists, struct block_hist *bh,
+ struct report *rep)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *he;
+
+ init_block_hist(bh, rep);
+
+ while (next) {
+ he = rb_entry(next, struct hist_entry, rb_node);
+ add_block_per_sym(he, bh, &rep->block_cycles,
+ rep->cycles_count);
+ next = rb_next(&he->rb_node);
+ }
+
+ hists__output_resort_cb(&bh->block_hists, NULL, resort_cb);
+ hists__clear_filtered(&bh->block_hists);
+}
+
+static int hists__fprintf_all_blocks(struct hists *hists, struct report *rep)
+{
+ struct block_hist *bh = &rep->block_hist;
+
+ get_block_hists(hists, bh, rep);
+ symbol_conf.report_individual_block = true;
+ hists__fprintf(&bh->block_hists, true, 0, 0, 0,
+ stdout, true);
+ hists__delete_entries(&bh->block_hists);
+ return 0;
+}
+
static int perf_evlist__tty_browse_hists(struct evlist *evlist,
struct report *rep,
const char *help)
@@ -500,6 +900,12 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
continue;

hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+
+ if (rep->total_cycles) {
+ hists__fprintf_all_blocks(hists, rep);
+ continue;
+ }
+
hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
!(symbol_conf.use_callchain ||
symbol_conf.show_branchflag_count));
@@ -1373,6 +1779,15 @@ int cmd_report(int argc, const char **argv)
goto error;
}

+ if (sort_order && strstr(sort_order, "total_cycles") &&
+ (sort__mode == SORT_MODE__BRANCH)) {
+ report.total_cycles = true;
+ if (!report.use_stdio) {
+ pr_err("Error: -s total_cycles can be only used together with --stdio\n");
+ goto error;
+ }
+ }
+
if (strcmp(input_name, "-") != 0)
setup_browser(true);
else
@@ -1423,7 +1838,7 @@ int cmd_report(int argc, const char **argv)
* so don't allocate extra space that won't be used in the stdio
* implementation.
*/
- if (ui__has_annotation() || report.symbol_ipc) {
+ if (ui__has_annotation() || report.symbol_ipc || report.total_cycles) {
ret = symbol__annotation_init();
if (ret < 0)
goto error;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5365606e9dad..655ef7708cd0 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -558,6 +558,25 @@ static int hist_entry__block_fprintf(struct hist_entry *he,
return ret;
}

+static int hist_entry__individual_block_fprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ FILE *fp)
+{
+ int ret = 0;
+
+ struct perf_hpp hpp = {
+ .buf = bf,
+ .size = size,
+ .skip = false,
+ };
+
+ hist_entry__snprintf(he, &hpp);
+ if (!hpp.skip)
+ ret += fprintf(fp, "%s\n", bf);
+
+ return ret;
+}
+
static int hist_entry__fprintf(struct hist_entry *he, size_t size,
char *bf, size_t bfsz, FILE *fp,
bool ignore_callchains)
@@ -580,6 +599,9 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
if (symbol_conf.report_block)
return hist_entry__block_fprintf(he, bf, size, fp);

+ if (symbol_conf.report_individual_block)
+ return hist_entry__individual_block_fprintf(he, bf, size, fp);
+
hist_entry__snprintf(he, &hpp);

ret = fprintf(fp, "%s\n", bf);
diff --git a/tools/perf/util/block.h b/tools/perf/util/block.h
index b6730204d0b9..bdd21354d26e 100644
--- a/tools/perf/util/block.h
+++ b/tools/perf/util/block.h
@@ -13,6 +13,7 @@ struct block_info {
u64 end;
u64 cycles;
u64 cycles_aggr;
+ u64 total_cycles;
s64 cycles_spark[NUM_SPARKS];
int num;
int num_aggr;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index af65ce950ba2..521f7185a94f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -756,6 +756,10 @@ struct hist_entry *hists__add_entry_block(struct hists *hists,
struct hist_entry entry = {
.block_info = block_info,
.hists = hists,
+ .ms = {
+ .map = al->map,
+ .sym = al->sym,
+ },
}, *he = hists__findnew_entry(hists, &entry, al, false);

return he;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 43d1d410854a..eb286700a8a9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -492,6 +492,10 @@ struct sort_entry sort_sym_ipc_null = {
.se_width_idx = HISTC_SYMBOL_IPC,
};

+struct sort_entry sort_block_cycles = {
+ .se_cmp = sort__sym_cmp,
+};
+
/* --sort srcfile */

static char no_srcfile[1];
@@ -1695,6 +1699,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+ DIM(SORT_BLOCK_CYCLES, "total_cycles", sort_block_cycles),
};

#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 5aff9542d9b7..2ede6c70ad56 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -239,6 +239,7 @@ enum sort_type {
SORT_SRCLINE_FROM,
SORT_SRCLINE_TO,
SORT_SYM_IPC,
+ SORT_BLOCK_CYCLES,

/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index e6880789864c..10f1ec3e0349 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -40,6 +40,7 @@ struct symbol_conf {
raw_trace,
report_hierarchy,
report_block,
+ report_individual_block,
inline_name,
disable_add2line_warn;
const char *vmlinux_name,
--
2.17.1