Re: [PATCH 1/3] perf disasm: Introduce symbol__disassemble_objdump()
From: Ian Rogers
Date: Mon Nov 11 2024 - 11:16:16 EST
On Mon, Nov 11, 2024 at 7:17 AM Arnaldo Carvalho de Melo
<acme@xxxxxxxxxx> wrote:
>
> From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
>
> With the first disassemble method in perf, the parsing of objdump
> output, just like we have for llvm and capstone.
>
> This paves the way to allow the user to specify what disassemblers are
> preferred and to also to at some point allow building without the
> objdump method.
>
> Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> Cc: Athira Rajeev <atrajeev@xxxxxxxxxxxxxxxxxx>
> Cc: Ian Rogers <irogers@xxxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: Steinar H. Gunderson <sesse@xxxxxxxxxx>
> Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Acked-by: Ian Rogers <irogers@xxxxxxxxxx>
Nit below relating to a pre-existing condition in the code.
> ---
> tools/perf/util/disasm.c | 168 ++++++++++++++++++++-------------------
> 1 file changed, 88 insertions(+), 80 deletions(-)
>
> diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
> index a525b80b934fdb5f..36cf61602c17fe69 100644
> --- a/tools/perf/util/disasm.c
> +++ b/tools/perf/util/disasm.c
> @@ -2045,17 +2045,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len)
> return new_line;
> }
>
> -int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
> + struct annotate_args *args)
> {
> struct annotation_options *opts = &annotate_opts;
> struct map *map = args->ms.map;
> struct dso *dso = map__dso(map);
> char *command;
> FILE *file;
> - char symfs_filename[PATH_MAX];
> - struct kcore_extract kce;
> - bool delete_extract = false;
> - bool decomp = false;
> int lineno = 0;
> char *fileloc = NULL;
> int nline;
> @@ -2070,77 +2067,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> NULL,
> };
> struct child_process objdump_process;
> - int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
> -
> - if (err)
> - return err;
> -
> - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
> - symfs_filename, sym->name, map__unmap_ip(map, sym->start),
> - map__unmap_ip(map, sym->end));
> -
> - pr_debug("annotating [%p] %30s : [%p] %30s\n",
> - dso, dso__long_name(dso), sym, sym->name);
> -
> - if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
> - return symbol__disassemble_bpf(sym, args);
> - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
> - return symbol__disassemble_bpf_image(sym, args);
> - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
> - return -1;
> - } else if (dso__is_kcore(dso)) {
> - kce.kcore_filename = symfs_filename;
> - kce.addr = map__rip_2objdump(map, sym->start);
> - kce.offs = sym->start;
> - kce.len = sym->end - sym->start;
> - if (!kcore_extract__create(&kce)) {
> - delete_extract = true;
> - strlcpy(symfs_filename, kce.extract_filename,
> - sizeof(symfs_filename));
> - }
> - } else if (dso__needs_decompress(dso)) {
> - char tmp[KMOD_DECOMP_LEN];
> -
> - if (dso__decompress_kmodule_path(dso, symfs_filename,
> - tmp, sizeof(tmp)) < 0)
> - return -1;
> -
> - decomp = true;
> - strcpy(symfs_filename, tmp);
> - }
> -
> - /*
> - * For powerpc data type profiling, use the dso__data_read_offset
> - * to read raw instruction directly and interpret the binary code
> - * to understand instructions and register fields. For sort keys as
> - * type and typeoff, disassemble to mnemonic notation is
> - * not required in case of powerpc.
> - */
> - if (arch__is(args->arch, "powerpc")) {
> - extern const char *sort_order;
> -
> - if (sort_order && !strstr(sort_order, "sym")) {
> - err = symbol__disassemble_raw(symfs_filename, sym, args);
> - if (err == 0)
> - goto out_remove_tmp;
> -#ifdef HAVE_LIBCAPSTONE_SUPPORT
> - err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
> - if (err == 0)
> - goto out_remove_tmp;
> -#endif
> - }
> - }
> -
> -#ifdef HAVE_LIBLLVM_SUPPORT
> - err = symbol__disassemble_llvm(symfs_filename, sym, args);
> - if (err == 0)
> - goto out_remove_tmp;
> -#endif
> -#ifdef HAVE_LIBCAPSTONE_SUPPORT
> - err = symbol__disassemble_capstone(symfs_filename, sym, args);
> - if (err == 0)
> - goto out_remove_tmp;
> -#endif
> + int err;
>
> err = asprintf(&command,
> "%s %s%s --start-address=0x%016" PRIx64
> @@ -2163,13 +2090,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>
> if (err < 0) {
> pr_err("Failure allocating memory for the command to run\n");
> - goto out_remove_tmp;
> + return err;
> }
>
> pr_debug("Executing: %s\n", command);
>
> objdump_argv[2] = command;
> - objdump_argv[4] = symfs_filename;
> + objdump_argv[4] = filename;
>
> /* Create a pipe to read from for stdout */
> memset(&objdump_process, 0, sizeof(objdump_process));
> @@ -2207,8 +2134,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> break;
>
> /* Skip lines containing "filename:" */
> - match = strstr(line, symfs_filename);
> - if (match && match[strlen(symfs_filename)] == ':')
> + match = strstr(line, filename);
> + if (match && match[strlen(filename)] == ':')
> continue;
>
> expanded_line = strim(line);
> @@ -2253,6 +2180,87 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>
> out_free_command:
> free(command);
> + return err;
> +}
> +
> +int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> +{
> + struct map *map = args->ms.map;
> + struct dso *dso = map__dso(map);
> + char symfs_filename[PATH_MAX];
> + bool delete_extract = false;
> + struct kcore_extract kce;
> + bool decomp = false;
> + int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
> +
> + if (err)
> + return err;
> +
> + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
> + symfs_filename, sym->name, map__unmap_ip(map, sym->start),
> + map__unmap_ip(map, sym->end));
> +
> + pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
> +
> + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
> + return symbol__disassemble_bpf(sym, args);
> + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
> + return symbol__disassemble_bpf_image(sym, args);
> + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
> + return -1;
> + } else if (dso__is_kcore(dso)) {
> + kce.addr = map__rip_2objdump(map, sym->start);
> + kce.kcore_filename = symfs_filename;
> + kce.len = sym->end - sym->start;
> + kce.offs = sym->start;
> +
> + if (!kcore_extract__create(&kce)) {
> + delete_extract = true;
> + strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
> + }
> + } else if (dso__needs_decompress(dso)) {
> + char tmp[KMOD_DECOMP_LEN];
> +
> + if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
> + return -1;
> +
> + decomp = true;
> + strcpy(symfs_filename, tmp);
> + }
> +
> + /*
> + * For powerpc data type profiling, use the dso__data_read_offset to
> + * read raw instruction directly and interpret the binary code to
> + * understand instructions and register fields. For sort keys as type
> + * and typeoff, disassemble to mnemonic notation is not required in
> + * case of powerpc.
> + */
> + if (arch__is(args->arch, "powerpc")) {
> + extern const char *sort_order;
> +
> + if (sort_order && !strstr(sort_order, "sym")) {
> + err = symbol__disassemble_raw(symfs_filename, sym, args);
> + if (err == 0)
> + goto out_remove_tmp;
> +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> + err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
> + if (err == 0)
> + goto out_remove_tmp;
> +#endif
> + }
> + }
> +
> +#ifdef HAVE_LIBLLVM_SUPPORT
> + err = symbol__disassemble_llvm(symfs_filename, sym, args);
> + if (err == 0)
> + goto out_remove_tmp;
> +#endif
> +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> + err = symbol__disassemble_capstone(symfs_filename, sym, args);
> + if (err == 0)
> + goto out_remove_tmp;
> +#endif
> + err = symbol__disassemble_objdump(symfs_filename, sym, args);
This sure does read like the symbol will be disassembled 3 times if
those ifdefs are defined. Is there anyway to make the code look more
intuitive?
Thanks,
Ian
>
> out_remove_tmp:
> if (decomp)
> --
> 2.47.0
>