[PATCH v2 4/4] perf script: enable printing of branch stack

From: Stephane Eranian
Date: Mon Aug 31 2015 - 12:41:32 EST


This patch improves perf script by enabling printing of the
branch stack via the 'brstack' and 'brstacksym' arguments to
the field selection option -F. The option is off by default
and operates only if the perf.data file has branch stack content.

The branches are printed in to/from pairs. The most recent branch
is printed first. The number of branch entries vary based on the
underlying hardware and filtering used.

The brstack prints FROM/TO addresses in raw hexadecimal format.
The brstacksym prints FROM/TO addresses in symbolic form wherever
possible.

$ perf script -F ip,brstack
5d3000 0x401aa0/0x5d2000/M/-/-/-/0 ...

$ perf script -F ip,brstacksym
4011e0 noploop+0x0/noploop+0x0/P/-/-/0

The notation F/T/M/X/A/C describes the attributes of the branch.
F=from, T=to, M/P=misprediction/prediction, X=TSX, A=TSX abort, C=cycles (SKL)

Signed-off-by: Stephane Eranian <eranian@xxxxxxxxxx>
---
tools/perf/Documentation/perf-script.txt | 14 +++++-
tools/perf/builtin-script.c | 82 +++++++++++++++++++++++++++++++-
2 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index dc3ec78..22e7b4d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -112,11 +112,11 @@ OPTIONS
--debug-mode::
Do various checks like samples ordering and lost events.

--f::
+-F::
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, flags.
+ srcline, period, iregs, brstack, brstacksym, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -175,6 +175,16 @@ OPTIONS
Finally, a user may not set fields to none for all event types.
i.e., -f "" is not allowed.

+ The brstack output includes branch related information with raw addresses using the
+ /v/v/v/v/ syntax in the following order:
+ FROM: branch source instruction
+ TO : branch target instruction
+ M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
+ X/- : X=branch inside a transactional region, -=not in transaction region or not supported
+ A/- : A=TSX abort entry, -=not aborted region or not supported
+
+ The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
+
-k::
--vmlinux=<file>::
vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index eb51325..93c86b9 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -48,6 +48,8 @@ enum perf_output_field {
PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13,
PERF_OUTPUT_IREGS = 1U << 14,
+ PERF_OUTPUT_BRSTACK = 1U << 15,
+ PERF_OUTPUT_BRSTACKSYM = 1U << 16,
};

struct output_option {
@@ -69,6 +71,8 @@ struct output_option {
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD},
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
+ {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
+ {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
};

/* default set to maintain compatibility with current format */
@@ -419,6 +423,77 @@ static void print_sample_start(struct perf_sample *sample,
}
}

+static inline char
+mispred_str(struct branch_entry *br)
+{
+ if (!(br->flags.mispred || br->flags.predicted))
+ return '-';
+
+ return br->flags.predicted ? 'P' : 'M';
+}
+
+static void print_sample_brstack(union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr __maybe_unused)
+{
+ struct branch_stack *br = sample->branch_stack;
+ u64 i;
+
+ if (!(br && br->nr))
+ return;
+
+ for (i = 0; i < br->nr; i++) {
+ printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
+ br->entries[i].from,
+ br->entries[i].to,
+ mispred_str( br->entries + i),
+ br->entries[i].flags.in_tx? 'X' : '-',
+ br->entries[i].flags.abort? 'A' : '-',
+ br->entries[i].flags.cycles);
+ }
+}
+
+static void print_sample_brstacksym(union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr __maybe_unused)
+{
+ struct branch_stack *br = sample->branch_stack;
+ struct addr_location alf, alt;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u64 i, from, to;
+
+ if (!(br && br->nr))
+ return;
+
+ for (i = 0; i < br->nr; i++) {
+
+ memset(&alf, 0, sizeof(alf));
+ memset(&alt, 0, sizeof(alt));
+ from = br->entries[i].from;
+ to = br->entries[i].to;
+
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf);
+ if (alf.map)
+ alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt);
+ if (alt.map)
+ alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+
+ symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+ putchar('/');
+ symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+ printf("/%c/%c/%c/%d ",
+ mispred_str( br->entries + i),
+ br->entries[i].flags.in_tx? 'X' : '-',
+ br->entries[i].flags.abort? 'A' : '-',
+ br->entries[i].flags.cycles);
+ }
+}
+
+
static void print_sample_addr(union perf_event *event,
struct perf_sample *sample,
struct thread *thread,
@@ -554,6 +629,11 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
if (PRINT_FIELD(IREGS))
print_sample_iregs(event, sample, thread, attr);

+ if (PRINT_FIELD(BRSTACK))
+ print_sample_brstack(event, sample, thread, attr);
+ else if (PRINT_FIELD(BRSTACKSYM))
+ print_sample_brstacksym(event, sample, thread, attr);
+
printf("\n");
}

@@ -1672,7 +1752,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,iregs,flags", parse_output_fields),
+ "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/