Re: [PATCH] perf srcline: Implement addr2line using libdw

From: Ian Rogers
Date: Mon Apr 01 2024 - 12:56:32 EST


On Mon, Apr 1, 2024 at 9:08 AM Martin Rodriguez Reboredo
<yakoyoku@xxxxxxxxx> wrote:
>
> `perf script` can be quite slow when inlines are enabled, by default it
> spawns an `addr2line` process and communicates with it via pipes, that
> comes with a certain overhead. The other option is to build perf with
> libbfd enabled so that its methods are called directly instead, but due
> to its licensing the resulting binary cannot be redistribuited.
>
> This commit adds the ability for perf to use libdw to query the source
> lines of binaries from the passed addresses. This avoids the process
> spawn overhead and because libdw is licensed dually under
> GPL-2.0-or-later and LGPL-3.0-or-later it can be distributed by
> downstream packagers. Another thing to consider is that if libdebuginfod
> was enabled for libdw then it's used to download debug info, it can be
> switched off by unsetting the `DEBUGINFOD_URLS` envvar.
>
> Signed-off-by: Martin Rodriguez Reboredo <yakoyoku@xxxxxxxxx>

Awesome sauce! Namhyung was just mentioning the idea to do this to me.
I wonder when this lands we can just work to remove all of the
BUILD_NONDISTRO options, namely libbfd, libiberty, etc. I suspect we
have dead/broken code hiding there.

> ---
> tools/perf/Makefile.config | 7 +-
> tools/perf/util/srcline.c | 277 ++++++++++++++++++++++++++++++++++++-
> 2 files changed, 281 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
> index 1fe8df97fe88..ab6d41e7a6b6 100644
> --- a/tools/perf/Makefile.config
> +++ b/tools/perf/Makefile.config
> @@ -947,13 +947,16 @@ ifdef BUILD_NONDISTRO
> $(call feature_check,disassembler-init-styled)
> endif
>
> - CFLAGS += -DHAVE_LIBBFD_SUPPORT
> - CXXFLAGS += -DHAVE_LIBBFD_SUPPORT
> + CFLAGS += -DHAVE_LIBBFD_SUPPORT -DHAVE_SYMBOLIZER_SUPPORT
> + CXXFLAGS += -DHAVE_LIBBFD_SUPPORT -DHAVE_SYMBOLIZER_SUPPORT

What does SYMBOLIZER mean in this context? Shouldn't the code be gated
on say a HAVE_LIBDW?

> ifeq ($(feature-libbfd-buildid), 1)
> CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
> else
> $(warning Old version of libbfd/binutils things like PE executable profiling will not be available)
> endif
> +else ifndef NO_LIBDW_DWARF_UNWIND
> + CFLAGS += -DHAVE_SYMBOLIZER_SUPPORT
> + CXXFLAGS += -DHAVE_SYMBOLIZER_SUPPORT
> endif
>
> ifndef NO_DEMANGLE
> diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
> index 7addc34afcf5..8117424137cc 100644
> --- a/tools/perf/util/srcline.c
> +++ b/tools/perf/util/srcline.c
> @@ -130,6 +130,8 @@ static struct symbol *new_inline_sym(struct dso *dso,
>
> #define MAX_INLINE_NEST 1024
>
> +#ifdef HAVE_SYMBOLIZER_SUPPORT
> +
> #ifdef HAVE_LIBBFD_SUPPORT
>
> /*
> @@ -372,6 +374,279 @@ void dso__free_a2l(struct dso *dso)
>
> #else /* HAVE_LIBBFD_SUPPORT */
>
> +#include <elfutils/libdwfl.h>
> +#include <dwarf.h>
> +
> +static char *debuginfo_path = NULL;
> +
> +static const Dwfl_Callbacks offline_callbacks = {
> + /* We use this table for core files too. */
> + .find_elf = dwfl_build_id_find_elf,
> + .find_debuginfo = dwfl_standard_find_debuginfo,
> + .section_address = dwfl_offline_section_address,
> + .debuginfo_path = &debuginfo_path,
> +
> +};
> +
> +struct a2l_data {

Perhaps libdw_a2l_data to avoid confusion with data used for forked
addr2line. Could you comment the variables? Names like "input" are
fairly generic so you could provide an example of what their values
are. It is also useful to comment when something like a string is
owned by the struct, so that cleaning it up can be checked.

> + const char *input;
> + Dwarf_Addr addr;
> + Dwarf_Addr bias;
> +
> + bool found;
> + const char *filename;
> + const char *funcname;
> + int line;

Moving "found" and "line" later will avoid padding. As this data is
attached to a DSO, does there need to be some kind of locking protocol
for >1 symbolizing the same DSO? Perhaps these should be filled in as
out arguments to avoid this kind of complexity.

Also, there is some DSO clean up happening in:
https://lore.kernel.org/lkml/CAM9d7chqnsDBCVFoK2hSs=22QrXBS=13Px5hGA4hM=ho7CZd2g@xxxxxxxxxxxxxx/
where accessor functions are used for the sake of reference count checking:
https://perf.wiki.kernel.org/index.php/Reference_Count_Checking
which may cause some minor conflicts with this patch.

> +
> + Dwfl *dwfl;
> + Dwfl_Module *mod;
> + GElf_Sym **syms;
> +};
> +
> +static const char *get_diename(Dwarf_Die *die)
> +{
> + Dwarf_Attribute attr;
> + const char *name;
> +
> + name = dwarf_formstring(
> + dwarf_attr_integrate(die, DW_AT_MIPS_linkage_name, &attr) ?:
> + dwarf_attr_integrate(die, DW_AT_linkage_name, &attr));
> +
> + if (name == NULL)
> + name = dwarf_diename(die) ?: "??";
> +
> + return name;
> +}
> +
> +static void find_address_in_section(struct a2l_data *a2l)
> +{
> + Dwarf_Addr addr;
> + Dwfl_Line *line;
> +
> + if (a2l->found)
> + return;
> +
> + a2l->mod = dwfl_addrmodule(a2l->dwfl, a2l->addr);
> + if (!a2l->mod)
> + return;
> +
> + dwfl_module_getdwarf(a2l->mod, &a2l->bias);
> + addr = a2l->addr + a2l->bias;
> +
> + line = dwfl_module_getsrc(a2l->mod, addr);
> + if (!line)
> + line = dwfl_getsrc(a2l->dwfl, addr);
> + if (!line)
> + return;
> +
> + a2l->filename = dwfl_lineinfo(line, NULL, &a2l->line, NULL, NULL, NULL);
> + a2l->funcname = dwfl_module_addrname(a2l->mod, addr);
> +
> + if (a2l->filename && !strlen(a2l->filename))
> + a2l->filename = NULL;
> + else
> + a2l->found = true;
> +}
> +
> +static struct a2l_data *addr2line_init(const char *path)
> +{
> + Dwfl *dwfl;
> + struct a2l_data *a2l = NULL;
> +
> + dwfl = dwfl_begin(&offline_callbacks);
> + if (!dwfl)
> + goto out;
> +
> + if (dwfl_report_offline(dwfl, "", path, -1) == NULL)
> + return NULL;
> +
> + a2l = zalloc(sizeof(*a2l));
> + if (a2l == NULL)
> + goto out;
> +
> + a2l->dwfl = dwfl;
> + a2l->input = strdup(path);
> + if (a2l->input == NULL)
> + goto out;
> +
> + if (dwfl_report_end(dwfl, NULL, NULL))
> + goto out;
> +
> + return a2l;
> +
> +out:
> + if (a2l) {
> + zfree((char **)&a2l->input);
> + free(a2l);
> + }
> + dwfl_end(dwfl);
> + return NULL;
> +}
> +
> +static void addr2line_cleanup(struct a2l_data *a2l)
> +{
> + if (a2l->dwfl)
> + dwfl_end(a2l->dwfl);
> + zfree((char **)&a2l->input);
> + zfree(&a2l->syms);
> + free(a2l);
> +}
> +
> +static int inline_list__append_dso_a2l(struct dso *dso,
> + struct inline_node *node,
> + struct symbol *sym)
> +{
> + struct a2l_data *a2l = dso->a2l;
> + struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
> + char *srcline = NULL;
> +
> + if (a2l->filename)
> + srcline = srcline_from_fileline(a2l->filename, a2l->line);
> +
> + return inline_list__append(inline_sym, srcline, node);
> +}
> +
> +static int get_inline_function(struct dso *dso, struct inline_node *node,
> + struct symbol *sym)
> +{
> + struct a2l_data *a2l = dso->a2l;
> + Dwarf_Addr addr = a2l->addr + a2l->bias;
> + Dwarf_Addr bias = 0;
> + Dwarf_Die *cudie = dwfl_module_addrdie(a2l->mod, addr, &bias);
> +
> + Dwarf_Die *scopes = NULL;
> + int nscopes = dwarf_getscopes(cudie, addr - bias, &scopes);
> + if (nscopes < 0)
> + return 0;
> +
> + if (nscopes > 0) {

Try to prefer early return to avoid indenting the code. So I think the
above can just be:

if (nscopes <= 0)
return 0;

and then you don't need this "if (nscopes > 0) {"

> + Dwarf_Die subroutine;
> + Dwarf_Off dieoff = dwarf_dieoffset(&scopes[0]);
> + dwarf_offdie(dwfl_module_getdwarf(a2l->mod, &bias), dieoff,
> + &subroutine);
> + free(scopes);
> + scopes = NULL;

Is this dead code?

> +
> + nscopes = dwarf_getscopes_die(&subroutine, &scopes);
> + if (nscopes > 1) {

Similar early return comment to above to avoid indentation.

Thanks,
Ian

> + Dwarf_Die cu;
> + Dwarf_Files *files;
> + if (dwarf_diecu(&scopes[0], &cu, NULL, NULL) != NULL &&
> + dwarf_getsrcfiles(cudie, &files, NULL) == 0) {
> + for (int i = 0; i < nscopes - 1; i++) {
> + Dwarf_Word val;
> + Dwarf_Attribute attr;
> + Dwarf_Die *die = &scopes[i];
> + if (dwarf_tag(die) !=
> + DW_TAG_inlined_subroutine)
> + continue;
> +
> + for (int j = i + 1; j < nscopes; j++) {
> + Dwarf_Die *parent = &scopes[j];
> + int tag = dwarf_tag(parent);
> + if (tag == DW_TAG_inlined_subroutine ||
> + tag == DW_TAG_entry_point ||
> + tag == DW_TAG_subprogram) {
> + a2l->funcname =
> + get_diename(
> + parent);
> + break;
> + }
> + }
> +
> + a2l->filename = NULL;
> + a2l->line = 0;
> + if (dwarf_formudata(
> + dwarf_attr(die,
> + DW_AT_call_file,
> + &attr),
> + &val) == 0)
> + a2l->filename = dwarf_filesrc(
> + files, val, NULL, NULL);
> +
> + if (dwarf_formudata(
> + dwarf_attr(die,
> + DW_AT_call_line,
> + &attr),
> + &val) == 0)
> + a2l->line = val;
> +
> + if (a2l->filename != NULL)
> + if (inline_list__append_dso_a2l(
> + dso, node, sym))
> + return 0;
> + }
> + }
> + }
> + }
> + free(scopes);
> +
> + return 1;
> +}
> +
> +static int addr2line(const char *dso_name, u64 addr, char **file,
> + unsigned int *line, struct dso *dso, bool unwind_inlines,
> + struct inline_node *node, struct symbol *sym)
> +{
> + int ret = 0;
> + struct a2l_data *a2l = dso->a2l;
> +
> + if (!a2l) {
> + dso->a2l = addr2line_init(dso_name);
> + a2l = dso->a2l;
> + }
> +
> + if (a2l == NULL) {
> + if (!symbol_conf.disable_add2line_warn)
> + pr_warning("addr2line_init failed for %s\n", dso_name);
> + return 0;
> + }
> +
> + a2l->addr = addr;
> + a2l->found = false;
> +
> + find_address_in_section(a2l);
> +
> + if (!a2l->found)
> + return 0;
> +
> + if (unwind_inlines) {
> + if (node && inline_list__append_dso_a2l(dso, node, sym))
> + return 0;
> +
> + if (node && !get_inline_function(dso, node, sym))
> + return 0;
> +
> + ret = 1;
> + }
> +
> + if (file) {
> + *file = a2l->filename ? strdup(a2l->filename) : NULL;
> + ret = *file ? 1 : 0;
> + }
> +
> + if (line)
> + *line = (unsigned int)a2l->line;
> +
> + return ret;
> +}
> +
> +void dso__free_a2l(struct dso *dso)
> +{
> + struct a2l_data *a2l = dso->a2l;
> +
> + if (!a2l)
> + return;
> +
> + addr2line_cleanup(a2l);
> +
> + dso->a2l = NULL;
> +}
> +
> +#endif /* HAVE_LIBBFD_SUPPORT */
> +
> +#else /* HAVE_SYMBOLIZER_SUPPORT */
> +
> static int filename_split(char *filename, unsigned int *line_nr)
> {
> char *sep;
> @@ -788,7 +1063,7 @@ void dso__free_a2l(struct dso *dso)
> dso->a2l = NULL;
> }
>
> -#endif /* HAVE_LIBBFD_SUPPORT */
> +#endif /* HAVE_SYMBOLIZER_SUPPORT */
>
> static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
> struct dso *dso, struct symbol *sym)
> --
> 2.44.0
>