[PATCH] gendwarfksyms: Add a separate pass to resolve FQNs
From: Sami Tolvanen
Date: Mon Feb 03 2025 - 16:26:50 EST
Using dwarf_getscopes_die to resolve fully-qualified names turns out to
be rather slow, and also results in duplicate scopes being processed,
which doesn't help. Simply adding an extra pass to resolve names for all
DIEs before processing exports is noticeably faster.
For the object files with the most exports in a defconfig+Rust build,
the performance improvement is consistently >50%:
rust/bindings.o: 1038 exports
before: 9.5980 +- 0.0183 seconds time elapsed ( +- 0.19% )
after: 4.3116 +- 0.0287 seconds time elapsed ( +- 0.67% )
rust/core.o: 424 exports
before: 5.3584 +- 0.0204 seconds time elapsed ( +- 0.38% )
after: 0.05348 +- 0.00129 seconds time elapsed ( +- 2.42% )
^ Not a mistake.
net/core/dev.o: 190 exports
before: 9.0507 +- 0.0297 seconds time elapsed ( +- 0.33% )
after: 3.2882 +- 0.0165 seconds time elapsed ( +- 0.50% )
rust/kernel.o: 129 exports
before: 6.8571 +- 0.0317 seconds time elapsed ( +- 0.46% )
after: 2.9096 +- 0.0316 seconds time elapsed ( +- 1.09% )
net/core/skbuff.o: 120 exports
before: 5.4805 +- 0.0291 seconds time elapsed ( +- 0.53% )
after: 2.0339 +- 0.0231 seconds time elapsed ( +- 1.14% )
drivers/gpu/drm/display/drm_dp_helper.o: 101 exports
before: 1.7877 +- 0.0187 seconds time elapsed ( +- 1.05% )
after: 0.69245 +- 0.00994 seconds time elapsed ( +- 1.44% )
net/core/sock.o: 97 exports
before: 5.8327 +- 0.0653 seconds time elapsed ( +- 1.12% )
after: 2.0784 +- 0.0291 seconds time elapsed ( +- 1.40% )
drivers/net/phy/phy_device.o: 95 exports
before: 3.0671 +- 0.0371 seconds time elapsed ( +- 1.21% )
after: 1.2127 +- 0.0207 seconds time elapsed ( +- 1.70% )
drivers/pci/pci.o: 93 exports
before: 1.1130 +- 0.0113 seconds time elapsed ( +- 1.01% )
after: 0.4848 +- 0.0127 seconds time elapsed ( +- 2.63% )
kernel/sched/core.o: 83 exports
before: 3.5092 +- 0.0223 seconds time elapsed ( +- 0.64% )
after: 1.1231 +- 0.0145 seconds time elapsed ( +- 1.29% )
Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8%
faster with this patch applied on my test system. Without Rust, there's
still a 10.4% improvement in build time when gendwarfksyms is used.
Note that symbol versions are unchanged with this patch.
Suggested-by: Giuliano Procida <gprocida@xxxxxxxxxx>
Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
---
scripts/gendwarfksyms/die.c | 2 +-
scripts/gendwarfksyms/dwarf.c | 152 ++++++++++++++------------
scripts/gendwarfksyms/gendwarfksyms.h | 2 +
scripts/gendwarfksyms/types.c | 2 +-
4 files changed, 86 insertions(+), 72 deletions(-)
diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index 66bd4c9bc952..6183bbbe7b54 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -6,7 +6,7 @@
#include <string.h>
#include "gendwarfksyms.h"
-#define DIE_HASH_BITS 15
+#define DIE_HASH_BITS 16
/* {die->addr, state} -> struct die * */
static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 534d9aa7c114..eed247d8abfc 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -3,6 +3,7 @@
* Copyright (C) 2024 Google LLC
*/
+#define _GNU_SOURCE
#include <assert.h>
#include <inttypes.h>
#include <stdarg.h>
@@ -193,79 +194,17 @@ static void process_fmt(struct die *cache, const char *fmt, ...)
va_end(args);
}
-#define MAX_FQN_SIZE 64
-
-/* Get a fully qualified name from DWARF scopes */
-static char *get_fqn(Dwarf_Die *die)
+static void update_fqn(struct die *cache, Dwarf_Die *die)
{
- const char *list[MAX_FQN_SIZE];
- Dwarf_Die *scopes = NULL;
- bool has_name = false;
- char *fqn = NULL;
- char *p;
- int count = 0;
- int len = 0;
- int res;
- int i;
-
- res = checkp(dwarf_getscopes_die(die, &scopes));
- if (!res) {
- list[count] = get_name_attr(die);
-
- if (!list[count])
- return NULL;
-
- len += strlen(list[count]);
- count++;
-
- goto done;
- }
-
- for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) {
- if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
- continue;
-
- list[count] = get_name_attr(&scopes[i]);
-
- if (list[count]) {
- has_name = true;
- } else {
- list[count] = "<anonymous>";
- has_name = false;
- }
+ struct die *fqn;
- len += strlen(list[count]);
- count++;
-
- if (i > 0) {
- list[count++] = "::";
- len += 2;
- }
+ if (!cache->fqn) {
+ if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &fqn) &&
+ *fqn->fqn)
+ cache->fqn = xstrdup(fqn->fqn);
+ else
+ cache->fqn = "";
}
-
- free(scopes);
-
- if (count == MAX_FQN_SIZE)
- warn("increase MAX_FQN_SIZE: reached the maximum");
-
- /* Consider the DIE unnamed if the last scope doesn't have a name */
- if (!has_name)
- return NULL;
-done:
- fqn = xmalloc(len + 1);
- *fqn = '\0';
-
- p = fqn;
- for (i = 0; i < count; i++)
- p = stpcpy(p, list[i]);
-
- return fqn;
-}
-
-static void update_fqn(struct die *cache, Dwarf_Die *die)
-{
- if (!cache->fqn)
- cache->fqn = get_fqn(die) ?: "";
}
static void process_fqn(struct die *cache, Dwarf_Die *die)
@@ -1148,8 +1087,81 @@ static void process_symbol_ptr(struct symbol *sym, void *arg)
cache_free(&state.expansion_cache);
}
+static int resolve_fqns(struct state *parent, struct die *unused,
+ Dwarf_Die *die)
+{
+ struct state state;
+ struct die *cache;
+ const char *name;
+ bool use_prefix;
+ char *prefix = NULL;
+ char *fqn = "";
+ int tag;
+
+ if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &cache))
+ return 0;
+
+ tag = dwarf_tag(die);
+
+ /*
+ * Only namespaces and structures need to pass a prefix to the next
+ * scope.
+ */
+ use_prefix = tag == DW_TAG_namespace || tag == DW_TAG_class_type ||
+ tag == DW_TAG_structure_type;
+
+ state.expand.current_fqn = NULL;
+ name = get_name_attr(die);
+
+ if (parent && parent->expand.current_fqn && (use_prefix || name)) {
+ /*
+ * The fqn for the current DIE, and if needed, a prefix for the
+ * next scope.
+ */
+ if (asprintf(&prefix, "%s::%s", parent->expand.current_fqn,
+ name ? name : "<anonymous>") < 0)
+ error("asprintf failed");
+
+ if (use_prefix)
+ state.expand.current_fqn = prefix;
+
+ /*
+ * Use fqn only if the DIE has a name. Otherwise fqn will
+ * remain empty.
+ */
+ if (name) {
+ fqn = prefix;
+ /* prefix will be freed by die_map. */
+ prefix = NULL;
+ }
+ } else if (name) {
+ /* No prefix from the previous scope. Use only the name. */
+ fqn = xstrdup(name);
+
+ if (use_prefix)
+ state.expand.current_fqn = fqn;
+ }
+
+ /* If the DIE has a non-empty name, cache it. */
+ if (*fqn) {
+ cache = die_map_get(die, DIE_FQN);
+ /* Move ownership of fqn to die_map. */
+ cache->fqn = fqn;
+ cache->state = DIE_FQN;
+ }
+
+ check(process_die_container(&state, NULL, die, resolve_fqns,
+ match_all));
+
+ free(prefix);
+ return 0;
+}
+
void process_cu(Dwarf_Die *cudie)
{
+ check(process_die_container(NULL, NULL, cudie, resolve_fqns,
+ match_all));
+
check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
match_all));
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 197a1a8123c6..2feec168bf73 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -139,6 +139,7 @@ void symbol_free(void);
enum die_state {
DIE_INCOMPLETE,
+ DIE_FQN,
DIE_UNEXPANDED,
DIE_COMPLETE,
DIE_SYMBOL,
@@ -170,6 +171,7 @@ static inline const char *die_state_name(enum die_state state)
{
switch (state) {
CASE_CONST_TO_STR(DIE_INCOMPLETE)
+ CASE_CONST_TO_STR(DIE_FQN)
CASE_CONST_TO_STR(DIE_UNEXPANDED)
CASE_CONST_TO_STR(DIE_COMPLETE)
CASE_CONST_TO_STR(DIE_SYMBOL)
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
index 6c03265f4d10..6f37289104ff 100644
--- a/scripts/gendwarfksyms/types.c
+++ b/scripts/gendwarfksyms/types.c
@@ -248,7 +248,7 @@ static char *get_type_name(struct die *cache)
warn("found incomplete cache entry: %p", cache);
return NULL;
}
- if (cache->state == DIE_SYMBOL)
+ if (cache->state == DIE_SYMBOL || cache->state == DIE_FQN)
return NULL;
if (!cache->fqn || !*cache->fqn)
return NULL;
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
--
2.48.1.362.g079036d154-goog