Re: [PATCH v6 01/18] tools: Add gendwarfksyms

From: Masahiro Yamada
Date: Wed Dec 04 2024 - 22:28:56 EST


On Wed, Dec 4, 2024 at 11:14 PM Daniel Gomez <da.gomez@xxxxxxxxxxx> wrote:
>
> On 11/21/2024 9:42 PM, Sami Tolvanen wrote:
> > Add a basic DWARF parser, which uses libdw to traverse the debugging
> > information in an object file and looks for functions and variables.
> > In follow-up patches, this will be expanded to produce symbol versions
> > for CONFIG_MODVERSIONS from DWARF.
> >
> > Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
> > Reviewed-by: Petr Pavlu <petr.pavlu@xxxxxxxx>
> > ---
> > kernel/module/Kconfig | 8 ++
> > scripts/Makefile | 1 +
> > scripts/gendwarfksyms/.gitignore | 2 +
> > scripts/gendwarfksyms/Makefile | 8 ++
> > scripts/gendwarfksyms/dwarf.c | 166 ++++++++++++++++++++++++++
> > scripts/gendwarfksyms/gendwarfksyms.c | 126 +++++++++++++++++++
> > scripts/gendwarfksyms/gendwarfksyms.h | 100 ++++++++++++++++
> > scripts/gendwarfksyms/symbols.c | 96 +++++++++++++++
> > 8 files changed, 507 insertions(+)
> > create mode 100644 scripts/gendwarfksyms/.gitignore
> > create mode 100644 scripts/gendwarfksyms/Makefile
> > create mode 100644 scripts/gendwarfksyms/dwarf.c
> > create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
> > create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
> > create mode 100644 scripts/gendwarfksyms/symbols.c
> >
> > diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
> > index 7c6588148d42..f9e5f82fa88b 100644
> > --- a/kernel/module/Kconfig
> > +++ b/kernel/module/Kconfig
> > @@ -169,6 +169,14 @@ config MODVERSIONS
> > make them incompatible with the kernel you are running. If
> > unsure, say N.
> >
> > +config GENDWARFKSYMS
> > + bool
> > + depends on DEBUG_INFO
> > + # Requires full debugging information, split DWARF not supported.
> > + depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
> > + # Requires ELF object files.
> > + depends on !LTO
> > +
> > config ASM_MODVERSIONS
> > bool
> > default HAVE_ASM_MODVERSIONS && MODVERSIONS
> > diff --git a/scripts/Makefile b/scripts/Makefile
> > index 6bcda4b9d054..d7fec46d38c0 100644
> > --- a/scripts/Makefile
> > +++ b/scripts/Makefile
> > @@ -54,6 +54,7 @@ targets += module.lds
> >
> > subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
> > subdir-$(CONFIG_MODVERSIONS) += genksyms
> > +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
> > subdir-$(CONFIG_SECURITY_SELINUX) += selinux
> > subdir-$(CONFIG_SECURITY_IPE) += ipe
> >
> > diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
> > new file mode 100644
> > index 000000000000..0927f8d3cd96
> > --- /dev/null
> > +++ b/scripts/gendwarfksyms/.gitignore
> > @@ -0,0 +1,2 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +/gendwarfksyms
> > diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> > new file mode 100644
> > index 000000000000..9f8fec4fd39b
> > --- /dev/null
> > +++ b/scripts/gendwarfksyms/Makefile
> > @@ -0,0 +1,8 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +hostprogs-always-y += gendwarfksyms
> > +
> > +gendwarfksyms-objs += gendwarfksyms.o
> > +gendwarfksyms-objs += dwarf.o
> > +gendwarfksyms-objs += symbols.o
> > +
> > +HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> > diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> > new file mode 100644
> > index 000000000000..81df3e2ad3ae
> > --- /dev/null
> > +++ b/scripts/gendwarfksyms/dwarf.c
> > @@ -0,0 +1,166 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2024 Google LLC
> > + */
> > +
> > +#include "gendwarfksyms.h"
> > +
> > +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
> > +{
> > + Dwarf_Attribute da;
> > +
> > + /* dwarf_formref_die returns a pointer instead of an error value. */
> > + return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
> > +}
> > +
> > +#define DEFINE_GET_STRING_ATTR(attr) \
> > + static const char *get_##attr##_attr(Dwarf_Die *die) \
> > + { \
> > + Dwarf_Attribute da; \
> > + if (dwarf_attr(die, DW_AT_##attr, &da)) \
> > + return dwarf_formstring(&da); \
> > + return NULL; \
> > + }
> > +
> > +DEFINE_GET_STRING_ATTR(name)
> > +DEFINE_GET_STRING_ATTR(linkage_name)
> > +
> > +static const char *get_symbol_name(Dwarf_Die *die)
> > +{
> > + const char *name;
> > +
> > + /* rustc uses DW_AT_linkage_name for exported symbols */
> > + name = get_linkage_name_attr(die);
> > + if (!name)
> > + name = get_name_attr(die);
> > +
> > + return name;
> > +}
> > +
> > +static bool match_export_symbol(struct state *state, Dwarf_Die *die)
> > +{
> > + Dwarf_Die *source = die;
> > + Dwarf_Die origin;
> > +
> > + /* If the DIE has an abstract origin, use it for type information. */
> > + if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
> > + source = &origin;
> > +
> > + state->sym = symbol_get(get_symbol_name(die));
> > +
> > + /* Look up using the origin name if there are no matches. */
> > + if (!state->sym && source != die)
> > + state->sym = symbol_get(get_symbol_name(source));
> > +
> > + state->die = *source;
> > + return !!state->sym;
> > +}
> > +
> > +/*
> > + * Type string processing
> > + */
> > +static void process(const char *s)
> > +{
> > + s = s ?: "<null>";
> > +
> > + if (dump_dies)
> > + fputs(s, stderr);
> > +}
> > +
> > +bool match_all(Dwarf_Die *die)
> > +{
> > + return true;
> > +}
> > +
> > +int process_die_container(struct state *state, Dwarf_Die *die,
> > + die_callback_t func, die_match_callback_t match)
> > +{
> > + Dwarf_Die current;
> > + int res;
> > +
> > + res = checkp(dwarf_child(die, &current));
> > + while (!res) {
> > + if (match(&current)) {
> > + /* <0 = error, 0 = continue, >0 = stop */
> > + res = checkp(func(state, &current));
> > + if (res)
> > + return res;
> > + }
> > +
> > + res = checkp(dwarf_siblingof(&current, &current));
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +/*
> > + * Exported symbol processing
> > + */
> > +static void process_symbol(struct state *state, Dwarf_Die *die,
> > + die_callback_t process_func)
> > +{
> > + debug("%s", state->sym->name);
> > + check(process_func(state, die));
> > + if (dump_dies)
> > + fputs("\n", stderr);
> > +}
> > +
> > +static int __process_subprogram(struct state *state, Dwarf_Die *die)
> > +{
> > + process("subprogram");
> > + return 0;
> > +}
> > +
> > +static void process_subprogram(struct state *state, Dwarf_Die *die)
> > +{
> > + process_symbol(state, die, __process_subprogram);
> > +}
> > +
> > +static int __process_variable(struct state *state, Dwarf_Die *die)
> > +{
> > + process("variable ");
> > + return 0;
> > +}
> > +
> > +static void process_variable(struct state *state, Dwarf_Die *die)
> > +{
> > + process_symbol(state, die, __process_variable);
> > +}
> > +
> > +static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
> > +{
> > + int tag = dwarf_tag(die);
> > +
> > + switch (tag) {
> > + /* Possible containers of exported symbols */
> > + case DW_TAG_namespace:
> > + case DW_TAG_class_type:
> > + case DW_TAG_structure_type:
> > + return check(process_die_container(
> > + NULL, die, process_exported_symbols, match_all));
> > +
> > + /* Possible exported symbols */
> > + case DW_TAG_subprogram:
> > + case DW_TAG_variable: {
> > + struct state state;
> > +
> > + if (!match_export_symbol(&state, die))
> > + return 0;
> > +
> > + if (tag == DW_TAG_subprogram)
> > + process_subprogram(&state, &state.die);
> > + else
> > + process_variable(&state, &state.die);
> > +
> > + return 0;
> > + }
> > + default:
> > + return 0;
> > + }
> > +}
> > +
> > +void process_cu(Dwarf_Die *cudie)
> > +{
> > + check(process_die_container(NULL, cudie, process_exported_symbols,
> > + match_all));
> > +}
> > diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> > new file mode 100644
> > index 000000000000..f84fa98fcbdb
> > --- /dev/null
> > +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> > @@ -0,0 +1,126 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2024 Google LLC
> > + */
> > +
> > +#include <fcntl.h>
> > +#include <getopt.h>
> > +#include <errno.h>
> > +#include <stdarg.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include "gendwarfksyms.h"
> > +
> > +/*
> > + * Options
> > + */
> > +
> > +/* Print debugging information to stderr */
> > +int debug;
> > +/* Dump DIE contents */
> > +int dump_dies;
> > +
> > +static void usage(void)
> > +{
> > + fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n"
> > + "Options:\n"
> > + " -d, --debug Print debugging information\n"
> > + " --dump-dies Dump DWARF DIE contents\n"
> > + " -h, --help Print this message\n"
> > + "\n",
> > + stderr);
> > +}
> > +
> > +static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
> > + Dwarf_Addr base, void *arg)
> > +{
> > + Dwarf_Addr dwbias;
> > + Dwarf_Die cudie;
> > + Dwarf_CU *cu = NULL;
> > + Dwarf *dbg;
> > + int res;
> > +
> > + debug("%s", name);
> > + dbg = dwfl_module_getdwarf(mod, &dwbias);
> > +
> > + do {
> > + res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
> > + if (res < 0)
> > + error("dwarf_get_units failed: no debugging information?");
> > + if (res == 1)
> > + break; /* No more units */
> > +
> > + process_cu(&cudie);
> > + } while (cu);
> > +
> > + return DWARF_CB_OK;
> > +}
> > +
> > +static const Dwfl_Callbacks callbacks = {
> > + .section_address = dwfl_offline_section_address,
> > + .find_debuginfo = dwfl_standard_find_debuginfo,
> > +};
> > +
> > +int main(int argc, char **argv)
> > +{
> > + unsigned int n;
> > + int opt;
> > +
> > + struct option opts[] = { { "debug", 0, NULL, 'd' },
> > + { "dump-dies", 0, &dump_dies, 1 },
> > + { "help", 0, NULL, 'h' },
> > + { 0, 0, NULL, 0 } };
> > +
> > + while ((opt = getopt_long(argc, argv, "dh", opts, NULL)) != EOF) {
> > + switch (opt) {
> > + case 0:
> > + break;
> > + case 'd':
> > + debug = 1;
> > + break;
> > + case 'h':
> > + usage();
> > + return 0;
> > + default:
> > + usage();
> > + return 1;
> > + }
> > + }
> > +
> > + if (optind >= argc) {
> > + usage();
> > + error("no input files?");
> > + }
> > +
> > + symbol_read_exports(stdin);
> > +
> > + for (n = optind; n < argc; n++) {
> > + Dwfl *dwfl;
> > + int fd;
> > +
> > + fd = open(argv[n], O_RDONLY);
> > + if (fd == -1)
> > + error("open failed for '%s': %s", argv[n],
> > + strerror(errno));
> > +
> > + dwfl = dwfl_begin(&callbacks);
> > + if (!dwfl)
> > + error("dwfl_begin failed for '%s': %s", argv[n],
> > + dwarf_errmsg(-1));
> > +
> > + if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd))
> > + error("dwfl_report_offline failed for '%s': %s",
> > + argv[n], dwarf_errmsg(-1));
> > +
> > + dwfl_report_end(dwfl, NULL, NULL);
> > +
> > + if (dwfl_getmodules(dwfl, &process_module, NULL, 0))
> > + error("dwfl_getmodules failed for '%s'", argv[n]);
> > +
> > + dwfl_end(dwfl);
> > + }
> > +
> > + symbol_free();
> > +
> > + return 0;
> > +}
> > diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> > new file mode 100644
> > index 000000000000..23e484af5d22
> > --- /dev/null
> > +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> > @@ -0,0 +1,100 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Copyright (C) 2024 Google LLC
> > + */
> > +
> > +#define _GNU_SOURCE
>
>
> I'm getting these warnings:
>
> scripts/gendwarfksyms/kabi.c:245:6: warning: implicit declaration of
> function 'asprintf' is invalid in C99 [-Wimplicit-function-declaration]
> if (asprintf(&target, "%s %s", fqn, field) < 0)
> ^
> 1 warning generated.
> HOSTCC scripts/gendwarfksyms/symbols.o
> HOSTCC scripts/gendwarfksyms/types.o
> scripts/gendwarfksyms/types.c:260:6: warning: implicit declaration of
> function 'asprintf' is invalid in C99 [-Wimplicit-function-declaration]
> if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn,
> quote) < 0)
> ^
> 1 warning generated.
>
>
> I think it may be cleaner to define _GNU_SOURCE in the CFLAGS instead.

I do not think so.

I believe the standard approach would be to define the necessary
macros and include headers where they are used.


diff --git a/scripts/gendwarfksyms/gendwarfksyms.h
b/scripts/gendwarfksyms/gendwarfksyms.h
index 86b3a3f2f558..127dceaf838d 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -3,8 +3,6 @@
* Copyright (C) 2024 Google LLC
*/

-#define _GNU_SOURCE
-
#include <dwarf.h>
#include <elfutils/libdw.h>
#include <elfutils/libdwfl.h>
diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c
index 2c6670ff1ac9..a3b5bb9e5487 100644
--- a/scripts/gendwarfksyms/kabi.c
+++ b/scripts/gendwarfksyms/kabi.c
@@ -3,7 +3,10 @@
* Copyright (C) 2024 Google LLC
*/

+#define _GNU_SOURCE
#include <errno.h>
+#include <stdio.h>
+
#include "gendwarfksyms.h"

#define KABI_RULE_SECTION ".discard.gendwarfksyms.kabi_rules"
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
index f4dbd21b83e6..c37afdb90fe9 100644
--- a/scripts/gendwarfksyms/types.c
+++ b/scripts/gendwarfksyms/types.c
@@ -3,7 +3,10 @@
* Copyright (C) 2024 Google LLC
*/

+#define _GNU_SOURCE
+#include <stdio.h>
#include <zlib.h>
+
#include "gendwarfksyms.h"

static struct cache expansion_cache;






The current code adopts:
- Collect all library header includes to gendwarfksyms.h
- All C files include "gendwarfksyms.h" but nothing else.


This smells like "please include <windows.h> from every file
when you program in Visual C++".


Personally I do not do that, but others may think differently.



--
Best Regards
Masahiro Yamada