Re: [PATCH 2/2] perf, tools, script: Add support for printing assembler

From: Arnaldo Carvalho de Melo
Date: Mon Feb 29 2016 - 09:56:37 EST


Em Fri, Feb 26, 2016 at 04:32:08PM -0800, Andi Kleen escreveu:
> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> When dumping PT traces with perf script it is very useful to see the
> assembler for each sample, so that it is easily possible to follow
> the control flow.
>
> As using objdump is difficult and inefficient from perf script this
> patch uses the udis86 library to implement assembler output.
> The library can be downloaded from http://udis86.sourceforge.net/
>
> The library is probed as an external dependency in the usual way. Then perf
> script calls into it when needed, and handles callbacks to resolve
> symbols.

What kernel have you used for testing this? 4.5-rc? I'm having trouble
with intel_pt with 4.5.0-rc4 :-\

And while this looks like a great feature to have, have you considered
using what is in tools/perf/util/intel-pt-decoder/ somehow, so that we
don't end up adding one more dependency on another external library?

It would be great if we could, if done that way, perhaps at some point
we could stop using objdump somehow :-\

- Arnaldo

> % perf record -e intel_pt//u true
> % perf script -F sym,symoff,ip,asm --itrace=i0ns | head
> 7fc7188b4190 _start+0x0 mov %rsp, %rdi
> 7fc7188b4193 _start+0x3 call _dl_start
> 7fc7188b7710 _dl_start+0x0 push %rbp
> 7fc7188b7711 _dl_start+0x1 mov %rsp, %rbp
> 7fc7188b7714 _dl_start+0x4 push %r15
> 7fc7188b7716 _dl_start+0x6 push %r14
> 7fc7188b7718 _dl_start+0x8 push %r13
> 7fc7188b771a _dl_start+0xa push %r12
> 7fc7188b771c _dl_start+0xc mov %rdi, %r12
> 7fc7188b771f _dl_start+0xf push %rbx
>
> Current issues:
> - Some jump references do not get resolved to symbols.
> - udis86 release does not support STAC/CLAC, which are used in the kernel,
> but there is a pending patch for it.
>
> Cc: adrian.hunter@xxxxxxxxx
> Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> ---
> tools/build/Makefile.feature | 6 +-
> tools/build/feature/Makefile | 8 ++-
> tools/build/feature/test-all.c | 5 ++
> tools/build/feature/test-udis86.c | 8 +++
> tools/perf/Documentation/perf-script.txt | 4 +-
> tools/perf/builtin-script.c | 108 +++++++++++++++++++++++++++++--
> tools/perf/config/Makefile | 5 ++
> 7 files changed, 134 insertions(+), 10 deletions(-)
> create mode 100644 tools/build/feature/test-udis86.c
>
> diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
> index 02db3cd..7de4fcb 100644
> --- a/tools/build/Makefile.feature
> +++ b/tools/build/Makefile.feature
> @@ -54,7 +54,8 @@ FEATURE_TESTS ?= \
> zlib \
> lzma \
> get_cpuid \
> - bpf
> + bpf \
> + udis86
>
> FEATURE_DISPLAY ?= \
> dwarf \
> @@ -73,7 +74,8 @@ FEATURE_DISPLAY ?= \
> zlib \
> lzma \
> get_cpuid \
> - bpf
> + bpf \
> + udis86
>
> # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
> # If in the future we need per-feature checks/flags for features not
> diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
> index bf8f035..c89941e 100644
> --- a/tools/build/feature/Makefile
> +++ b/tools/build/feature/Makefile
> @@ -35,7 +35,8 @@ FILES= \
> test-zlib.bin \
> test-lzma.bin \
> test-bpf.bin \
> - test-get_cpuid.bin
> + test-get_cpuid.bin \
> + test-udis86.bin
>
> FILES := $(addprefix $(OUTPUT),$(FILES))
>
> @@ -50,7 +51,7 @@ __BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFL
> ###############################
>
> $(OUTPUT)test-all.bin:
> - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
> + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -ludis86
>
> $(OUTPUT)test-hello.bin:
> $(BUILD)
> @@ -96,6 +97,9 @@ $(OUTPUT)test-numa_num_possible_cpus.bin:
> $(OUTPUT)test-libunwind.bin:
> $(BUILD) -lelf
>
> +$(OUTPUT)test-udis86.bin:
> + $(BUILD) -ludis86
> +
> $(OUTPUT)test-libunwind-debug-frame.bin:
> $(BUILD) -lelf
>
> diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
> index 81025ca..d67b1d5 100644
> --- a/tools/build/feature/test-all.c
> +++ b/tools/build/feature/test-all.c
> @@ -129,6 +129,10 @@
> # include "test-bpf.c"
> #undef main
>
> +#define main main_test_udis86
> +# include "test-udis86.c"
> +#endif
> +
> int main(int argc, char *argv[])
> {
> main_test_libpython();
> @@ -158,6 +162,7 @@ int main(int argc, char *argv[])
> main_test_lzma();
> main_test_get_cpuid();
> main_test_bpf();
> + main_test_udis86();
>
> return 0;
> }
> diff --git a/tools/build/feature/test-udis86.c b/tools/build/feature/test-udis86.c
> new file mode 100644
> index 0000000..623c545
> --- /dev/null
> +++ b/tools/build/feature/test-udis86.c
> @@ -0,0 +1,8 @@
> +#include <udis86.h>
> +
> +int main(void)
> +{
> + ud_t ud;
> + ud_init(&ud);
> + return 0;
> +}
> diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
> index 382ddfb..c834f4d 100644
> --- a/tools/perf/Documentation/perf-script.txt
> +++ b/tools/perf/Documentation/perf-script.txt
> @@ -116,7 +116,7 @@ OPTIONS
> --fields::
> Comma separated list of fields to print. Options are:
> comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
> - srcline, period, iregs, brstack, brstacksym, flags.
> + srcline, period, iregs, brstack, brstacksym, flags, asm.
> Field list can be prepended with the type, trace, sw or hw,
> to indicate to which event type the field list applies.
> e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
> @@ -185,6 +185,8 @@ OPTIONS
>
> The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
>
> + When asm is specified the assembler instruction of each sample is printed in disassembled form.
> +
> -k::
> --vmlinux=<file>::
> vmlinux pathname
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index c691214..9fd8cb3 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -24,6 +24,10 @@
> #include <linux/bitmap.h>
> #include "asm/bug.h"
>
> +#ifdef HAVE_UDIS86
> +#include <udis86.h>
> +#endif
> +
> static char const *script_name;
> static char const *generate_script_lang;
> static bool debug_mode;
> @@ -58,6 +62,7 @@ enum perf_output_field {
> PERF_OUTPUT_IREGS = 1U << 14,
> PERF_OUTPUT_BRSTACK = 1U << 15,
> PERF_OUTPUT_BRSTACKSYM = 1U << 16,
> + PERF_OUTPUT_ASM = 1U << 17,
> };
>
> struct output_option {
> @@ -81,6 +86,7 @@ struct output_option {
> {.str = "iregs", .field = PERF_OUTPUT_IREGS},
> {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
> {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
> + {.str = "asm", .field = PERF_OUTPUT_ASM},
> };
>
> /* default set to maintain compatibility with current format */
> @@ -264,7 +270,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
> "selected. Hence, no address to lookup the source line number.\n");
> return -EINVAL;
> }
> -
> + if (PRINT_FIELD(ASM) && !PRINT_FIELD(IP)) {
> + pr_err("Display of assembler requested but sample IP is not\n"
> + "selected.\n");
> + return -EINVAL;
> + }
> if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
> perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
> PERF_OUTPUT_TID|PERF_OUTPUT_PID))
> @@ -405,6 +415,89 @@ static void print_sample_iregs(union perf_event *event __maybe_unused,
> }
> }
>
> +#ifdef HAVE_UDIS86
> +
> +struct perf_ud {
> + ud_t ud_obj;
> + struct thread *thread;
> + u8 cpumode;
> + int cpu;
> +};
> +
> +static const char *dis_resolve(struct ud *u, uint64_t addr, int64_t *off)
> +{
> + struct perf_ud *ud = container_of(u, struct perf_ud, ud_obj);
> + struct addr_location al;
> +
> + memset(&al, 0, sizeof(struct addr_location));
> +
> + thread__find_addr_map(ud->thread, ud->cpumode, MAP__FUNCTION, addr, &al);
> + if (!al.map)
> + thread__find_addr_map(ud->thread, ud->cpumode, MAP__VARIABLE,
> + addr, &al);
> + al.cpu = ud->cpu;
> + al.sym = NULL;
> +
> + if (al.map)
> + al.sym = map__find_symbol(al.map, al.addr, NULL);
> +
> + if (!al.sym)
> + return NULL;
> +
> + if (addr < al.sym->end)
> + *off = addr - al.sym->start;
> + else
> + *off = addr - al.map->start - al.sym->start;
> + return al.sym->name;
> +}
> +#endif
> +
> +static void print_sample_asm(union perf_event *event __maybe_unused,
> + struct perf_sample *sample __maybe_unused,
> + struct thread *thread __maybe_unused,
> + struct perf_event_attr *attr __maybe_unused,
> + struct addr_location *al __maybe_unused,
> + struct machine *machine __maybe_unused)
> +{
> +#ifdef HAVE_UDIS86
> + static bool ud_initialized = false;
> + static struct perf_ud ud;
> + u8 buffer[32];
> + int len;
> + u64 offset;
> +
> + if (!ud_initialized) {
> + ud_initialized = true;
> + ud_init(&ud.ud_obj);
> + ud_set_syntax(&ud.ud_obj, UD_SYN_ATT);
> + ud_set_sym_resolver(&ud.ud_obj, dis_resolve);
> + }
> + ud.thread = thread;
> + ud.cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
> + ud.cpu = sample->cpu;
> +
> + if (!al->map || !al->map->dso)
> + return;
> + if (al->map->dso->data.status == DSO_DATA_STATUS_ERROR)
> + return;
> +
> + /* Load maps to ensure dso->is_64_bit has been updated */
> + map__load(al->map, machine->symbol_filter);
> +
> + offset = al->map->map_ip(al->map, sample->ip);
> + len = dso__data_read_offset(al->map->dso, machine,
> + offset, buffer, 32);
> + if (len <= 0)
> + return;
> +
> + ud_set_mode(&ud.ud_obj, al->map->dso->is_64_bit ? 64 : 32);
> + ud_set_pc(&ud.ud_obj, sample->ip);
> + ud_set_input_buffer(&ud.ud_obj, buffer, len);
> + ud_disassemble(&ud.ud_obj);
> + printf("\t%s", ud_insn_asm(&ud.ud_obj));
> +#endif
> +}
> +
> static void print_sample_start(struct perf_sample *sample,
> struct thread *thread,
> struct perf_evsel *evsel)
> @@ -636,7 +729,8 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
>
> static void process_event(struct perf_script *script, union perf_event *event,
> struct perf_sample *sample, struct perf_evsel *evsel,
> - struct addr_location *al)
> + struct addr_location *al,
> + struct machine *machine)
> {
> struct thread *thread = al->thread;
> struct perf_event_attr *attr = &evsel->attr;
> @@ -664,7 +758,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
>
> if (is_bts_event(attr)) {
> print_sample_bts(event, sample, evsel, thread, al);
> - return;
> + goto print_rest;
> }
>
> if (PRINT_FIELD(TRACE))
> @@ -687,11 +781,15 @@ static void process_event(struct perf_script *script, union perf_event *event,
> if (PRINT_FIELD(IREGS))
> print_sample_iregs(event, sample, thread, attr);
>
> +print_rest:
> if (PRINT_FIELD(BRSTACK))
> print_sample_brstack(event, sample, thread, attr);
> else if (PRINT_FIELD(BRSTACKSYM))
> print_sample_brstacksym(event, sample, thread, attr);
>
> + if (PRINT_FIELD(ASM))
> + print_sample_asm(event, sample, thread, attr, al, machine);
> +
> printf("\n");
> }
>
> @@ -798,7 +896,7 @@ static int process_sample_event(struct perf_tool *tool,
> if (scripting_ops)
> scripting_ops->process_event(event, sample, evsel, &al);
> else
> - process_event(scr, event, sample, evsel, &al);
> + process_event(scr, event, sample, evsel, &al, machine);
>
> out_put:
> addr_location__put(&al);
> @@ -1913,7 +2011,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
> "comma separated output fields prepend with 'type:'. "
> "Valid types: hw,sw,trace,raw. "
> "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
> - "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
> + "addr,symoff,period,iregs,brstack,brstacksym,flags,asm", parse_output_fields),
> OPT_BOOLEAN('a', "all-cpus", &system_wide,
> "system-wide collection from all CPUs"),
> OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
> index 511141b..19bac7c 100644
> --- a/tools/perf/config/Makefile
> +++ b/tools/perf/config/Makefile
> @@ -576,6 +576,11 @@ ifneq ($(filter -lbfd,$(EXTLIBS)),)
> CFLAGS += -DHAVE_LIBBFD_SUPPORT
> endif
>
> +ifeq ($(feature-udis86), 1)
> + CFLAGS += -DHAVE_UDIS86
> + EXTLIBS += -ludis86
> +endif
> +
> ifndef NO_ZLIB
> ifeq ($(feature-zlib), 1)
> CFLAGS += -DHAVE_ZLIB_SUPPORT
> --
> 2.5.0