[PATCH 03/10] perf, tools, report: Add infrastructure for a cycles histogram

From: Andi Kleen
Date: Sun May 10 2015 - 09:54:35 EST


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

This adds the basic infrastructure to keep track of cycle counts
per basic block for annotate. We allocate an array similar to the
normal accounting, and then account branch cycles there.

We handle two cases:
cycles per basic block with start and cycles per branch
(these are later used for either IPC or just cycles per BB)

In the start case we cannot handle overlaps, so always the longest
basic block wins.

For the cycles per branch case everything is accurately accounted.

Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
tools/perf/builtin-annotate.c | 1 +
tools/perf/util/annotate.c | 145 ++++++++++++++++++++++++++++++++++++++++--
tools/perf/util/annotate.h | 17 +++++
3 files changed, 157 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 71bf745..52e7575 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -181,6 +181,7 @@ find_next:
* symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
+ zfree(&notes->src->cycles_hist);
zfree(&notes->src);
}
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc..7701dfb 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -473,17 +473,85 @@ int symbol__alloc_hist(struct symbol *sym)
return 0;
}

+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ const size_t size = symbol__size(sym);
+
+ notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+ if (notes->src->cycles_hist == NULL)
+ return -1;
+ return 0;
+}
+
void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);

pthread_mutex_lock(&notes->lock);
- if (notes->src != NULL)
+ if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+ if (notes->src->cycles_hist)
+ memset(notes->src->cycles_hist, 0,
+ symbol__size(sym) * sizeof(struct cyc_hist));
+ }
pthread_mutex_unlock(&notes->lock);
}

+static int __symbol__account_cycles(struct symbol *sym,
+ struct annotation *notes,
+ u64 start,
+ unsigned offset, unsigned cycles,
+ unsigned have_start)
+{
+ /*
+ * If available record cycles of last basic block.
+ */
+ if (cycles) {
+ struct cyc_hist *ch;
+
+ if (!notes->src->cycles_hist) {
+ if (symbol__alloc_hist_cycles(sym) < 0)
+ return -ENOMEM;
+ }
+ ch = notes->src->cycles_hist;
+ if (ch != NULL) {
+ /*
+ * For now we can only account one basic block per
+ * final jump. But multiple could be overlapping.
+ * Always account the longest one. So when
+ * a shorter one has been already seen throw it away.
+ *
+ * We separately always account the full cycles.
+ */
+ ch[offset].num_aggr++;
+ ch[offset].cycles_aggr += cycles;
+ if (!have_start && ch[offset].have_start)
+ return 0;
+ if (ch[offset].num) {
+ if (have_start &&
+ (!ch[offset].have_start ||
+ ch[offset].start > start)) {
+ ch[offset].have_start = 0;
+ ch[offset].cycles = 0;
+ ch[offset].num = 0;
+ if (ch[offset].reset < 0xffff)
+ ch[offset].reset++;
+ } else if (have_start &&
+ ch[offset].start < start)
+ return 0;
+ }
+ ch[offset].have_start = have_start;
+ ch[offset].start = start;
+ ch[offset].cycles += cycles;
+ ch[offset].num++;
+ }
+ }
+ return 0;
+}
+
static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
struct annotation *notes, int evidx, u64 addr)
{
@@ -506,6 +574,17 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
return 0;
}

+static struct annotation *symbol__get_annotation(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (notes->src == NULL) {
+ if (symbol__alloc_hist(sym) < 0)
+ return NULL;
+ }
+ return notes;
+}
+
static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
int evidx, u64 addr)
{
@@ -513,14 +592,68 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,

if (sym == NULL)
return 0;
+ notes = symbol__get_annotation(sym);
+ if (notes == NULL)
+ return -ENOMEM;
+ return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
+}

- notes = symbol__annotation(sym);
- if (notes->src == NULL) {
- if (symbol__alloc_hist(sym) < 0)
- return -ENOMEM;
+static int symbol__account_cycles(u64 addr, u64 start,
+ struct symbol *sym, unsigned cycles)
+{
+ struct annotation *notes;
+ unsigned offset;
+
+ if (sym == NULL)
+ return 0;
+ notes = symbol__get_annotation(sym);
+ if (notes == NULL)
+ return -ENOMEM;
+ if (addr < sym->start || addr >= sym->end)
+ return -ERANGE;
+
+ if (start) {
+ if (start < sym->start || start >= sym->end)
+ return -ERANGE;
+ if (start >= addr)
+ start = 0;
}
+ offset = addr - sym->start;
+ return __symbol__account_cycles(sym, notes,
+ start ? start - sym->start : 0,
+ offset, cycles,
+ !!start);
+}

- return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles)
+{
+ unsigned long saddr = 0;
+ int err;
+
+ /*
+ * Only set start when IPC can be computed. We can only
+ * compute it when the basic block is completely in a single
+ * function.
+ * Special case the case when the jump is elsewhere, but
+ * it starts on the function start.
+ */
+ if (start &&
+ (start->sym == ams->sym ||
+ (ams->sym &&
+ start->addr == ams->sym->start + ams->map->start)))
+ saddr = start->al_addr;
+ if (saddr == 0)
+ pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
+ ams->addr,
+ start ? start->addr : 0,
+ ams->sym ? ams->sym->start + ams->map->start : 0,
+ saddr);
+ err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
+ if (err)
+ pr_debug2("account_cycles failed %d\n", err);
+ return err;
}

int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cadbdc9..9080181 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -79,6 +79,17 @@ struct sym_hist {
u64 addr[0];
};

+struct cyc_hist {
+ u64 start;
+ u64 cycles;
+ u64 cycles_aggr;
+ u32 num;
+ u32 num_aggr;
+ u8 have_start;
+ /* 1 byte padding */
+ u16 reset;
+};
+
struct source_line_percent {
double percent;
double percent_sum;
@@ -96,6 +107,7 @@ struct source_line {
* @histogram: Array of addr hit histograms per event being monitored
* @lines: If 'print_lines' is specified, per source code line percentages
* @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
*
* lines is allocated, percentages calculated and all sorted by percentage
* when the annotation is about to be presented, so the percentages are for
@@ -108,6 +120,7 @@ struct annotated_source {
struct source_line *lines;
int nr_histograms;
int sizeof_sym_hist;
+ struct cyc_hist *cycles_hist;
struct sym_hist histograms[0];
};

@@ -129,6 +142,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)

int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);

+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles);
+
int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);

int symbol__alloc_hist(struct symbol *sym);
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/