[tip: objtool/core] objtool/klp: Rewrite symbol correlation algorithm

From: tip-bot2 for Josh Poimboeuf

Date: Tue May 05 2026 - 07:05:13 EST


The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 6016dd33a10a89d7db9509124d678d38916ab337
Gitweb: https://git.kernel.org/tip/6016dd33a10a89d7db9509124d678d38916ab337
Author: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
AuthorDate: Thu, 09 Apr 2026 20:51:27 -07:00
Committer: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
CommitterDate: Mon, 04 May 2026 21:16:06 -07:00

objtool/klp: Rewrite symbol correlation algorithm

Rewrite the symbol correlation code, using a tiered list of
deterministic strategies in a loop. For duplicately named symbols, each
tier applies a filter with the goal of finding a 1:1 deterministic
correlation between the original and patched version of the symbol.

The three matching strategies are:

find_twin(): A funnel of progressively tighter filters. Candidates
with the same demangled name are counted at four levels: name, scope
(local-vs-global), file (strict file association), and checksum
(unchanged functions). The widest level that yields a 1:1 match wins,
narrower levels are only tried when the wider level is ambiguous.

find_twin_suffixed(): Uses already-correlated LLVM symbol pairs to map
.llvm.<hash> suffixes from orig to patched. Because all promoted
symbols from the same TU share the same hash, one correlated pair
seeds the mapping for the entire TU.

find_twin_positional(): Last resort, matches symbols by position among
same-named candidates, similar to livepatch sympos. Used for data
objects like __quirk variables where no deterministic filter can
distinguish the candidates.

Overall this works much better than the existing algorithm, particularly
with LTO kernels.

Acked-by: Song Liu <song@xxxxxxxxxx>
Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
tools/objtool/klp-diff.c | 489 ++++++++++++++++++++++++++++----------
1 file changed, 369 insertions(+), 120 deletions(-)

diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c
index 5f13d75..a9c9932 100644
--- a/tools/objtool/klp-diff.c
+++ b/tools/objtool/klp-diff.c
@@ -408,78 +408,358 @@ static bool dont_correlate(struct symbol *sym)
is_special_section_aux(sym->sec);
}

+static const char *llvm_suffix(const char *name)
+{
+ return strstr(name, ".llvm.");
+}
+
+static bool is_llvm_sym(struct symbol *sym)
+{
+ return llvm_suffix(sym->name);
+}
+
/*
- * When there is no full name match, try match demangled_name. This would
- * match original foo.llvm.123 to patched foo.llvm.456.
+ * Determine if two symbols have compatible source file origins:
*
- * Note that, in very rare cases, it is possible to have multiple
- * foo.llvm.<hash> in the same kernel. When this happens, report error and
- * fail the diff.
+ * - If both symbols are local, only return true if they belong to the same
+ * ELF file symbol.
+ *
+ * - If both symbols are global, always return true, as globals don't have
+ * file associations.
+ *
+ * - If they have different scopes, also return true, as the patch might have
+ * changed the symbol's scope.
+ *
+ * Works for both same-ELF (direct pointer compare) and cross-ELF
+ * (compare via file->twin) cases.
*/
-static int find_global_symbol_by_demangled_name(struct elf *elf, struct symbol *sym,
- struct symbol **out_sym)
+static bool maybe_same_file(struct symbol *sym1, struct symbol *sym2)
{
- struct symbol *sym2, *result = NULL;
- int count = 0;
+ if (!sym1->file || !sym2->file)
+ return true;
+ if (sym1->file == sym2->file)
+ return true;
+ return sym1->file->twin == sym2->file;
+}
+
+/*
+ * Similar to maybe_same_file(), but strict: no scope changes allowed.
+ *
+ * Works for both same-ELF (direct pointer compare) and cross-ELF
+ * (compare via file->twin) cases.
+ */
+static bool same_file(struct symbol *sym1, struct symbol *sym2)
+{
+ if (llvm_suffix(sym1->name) && llvm_suffix(sym2->name))
+ return true;
+ if (!sym1->file && !sym2->file)
+ return true;
+ if (!sym1->file || !sym2->file)
+ return false;
+ if (sym1->file == sym2->file)
+ return true;
+ return sym1->file->twin == sym2->file;
+}

- for_each_sym_by_demangled_name(elf, sym->demangled_name, sym2) {
- if (is_local_sym(sym2) || sym2->twin)
+/*
+ * Is it a local symbol, or at least was it local in the translation unit
+ * before LLVM promoted it?
+ */
+static bool is_tu_local_sym(struct symbol *sym)
+{
+ return is_local_sym(sym) || is_llvm_sym(sym);
+}
+
+/*
+ * Try to find sym1's twin in patched using deterministic matching.
+ *
+ * Multiple symbols can share a demangled name (e.g., static functions in
+ * different TUs). This function counts same-named candidates through a
+ * funnel of progressively tighter filters. Each level is a strict subset
+ * of the previous one.
+ *
+ * The widest level that yields a 1:1 match wins. Narrower levels are only
+ * needed when the wider level is ambiguous (count > 1).
+ *
+ * Candidates are pre-filtered by maybe_same_file(), which narrows most
+ * local symbols to their own TU. For example, 19 different static
+ * type_show() functions across vmlinux.o each see only one candidate after
+ * pre-filtering, so they match immediately at Level 1.
+ *
+ * Level 1 (name): Works when the demangled name is unique after
+ * pre-filtering. Handles most symbols: unique globals like copy_signal(),
+ * or per-TU locals like pcspkr_probe().
+ *
+ * Level 2 (scope): Filters by local-vs-global (TU-local-vs-not). Example:
+ * parse_header() exists as both a static and a global function. Level 1
+ * sees both (same demangled name), but Level 2 separates them by scope.
+ *
+ * Level 3 (file): Strict file matching via same_file(), which rejects scope
+ * changes. Example: LLVM-promoted foo.llvm.12345 (global, no FILE symbol)
+ * vs genuine local foo (has FILE symbol). Both are TU-local so Level 2
+ * can't distinguish them, but same_file() rejects the pair because one has
+ * a file association and the other doesn't.
+ *
+ * Level 4 (checksum): Distinguishes by function checksum. Example:
+ * usb_devnode.llvm.AAA and usb_devnode.llvm.BBB are two LLVM-promoted
+ * functions from different TUs with the same demangled name. After a TU
+ * change, the .llvm. hashes change but the functions themselves may be
+ * unchanged. Level 4 matches each to the patched candidate with the
+ * same checksum.
+ */
+static struct symbol *find_twin(struct elfs *e, struct symbol *sym1)
+{
+ struct symbol *name_last = NULL, *scope_last = NULL,
+ *file_last = NULL, *csum_last = NULL;
+ unsigned int name_orig = 0, name_patched = 0;
+ unsigned int scope_orig = 0, scope_patched = 0;
+ unsigned int file_orig = 0, file_patched = 0;
+ unsigned int csum_orig = 0, csum_patched = 0;
+ struct symbol *sym2, *match = NULL;
+
+ /* Count orig candidates */
+ for_each_sym_by_demangled_name(e->orig, sym1->demangled_name, sym2) {
+ if (sym2->twin || sym1->type != sym2->type || dont_correlate(sym2) ||
+ (!maybe_same_file(sym1, sym2)))
continue;

- count++;
- result = sym2;
+ /* Level 1: name match (widest filter) */
+ name_orig++;
+
+ /* Level 2: scope (scope changes allowed) */
+ if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2))
+ continue;
+ scope_orig++;
+
+ /* Level 3: file (scope changes disallowed) */
+ if (!same_file(sym1, sym2))
+ continue;
+ file_orig++;
+
+ /* Level 4: checksum (unchanged symbols) */
+ if (sym1->len != sym2->len || !sym1->csum.checksum ||
+ sym1->csum.checksum != sym2->csum.checksum)
+ continue;
+ csum_orig++;
}

- if (count > 1) {
- ERROR("Multiple (%d) correlation candidates for %s", count, sym->name);
- return -1;
+ /* Count patched candidates */
+ for_each_sym_by_demangled_name(e->patched, sym1->demangled_name, sym2) {
+ if (sym2->twin || sym1->type != sym2->type || dont_correlate(sym2) ||
+ !maybe_same_file(sym1, sym2))
+ continue;
+
+ /* Level 1 */
+ name_patched++;
+ name_last = sym2;
+
+ /* Level 2 */
+ if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2))
+ continue;
+ scope_patched++;
+ scope_last = sym2;
+
+ /* Level 3 */
+ if (!same_file(sym1, sym2))
+ continue;
+ file_patched++;
+ file_last = sym2;
+
+ /* Level 4 */
+ if (sym1->len != sym2->len || !sym1->csum.checksum ||
+ sym1->csum.checksum != sym2->csum.checksum)
+ continue;
+ csum_patched++;
+ csum_last = sym2;
}

- *out_sym = result;
- return 0;
+ /* Return the widest level that yields a unique (1:1) match */
+ if (name_orig == 1 && name_patched == 1)
+ match = name_last;
+ else if (scope_orig == 1 && scope_patched == 1)
+ match = scope_last;
+ else if (file_orig == 1 && file_patched == 1)
+ match = file_last;
+ else if (csum_orig == 1 && csum_patched == 1)
+ match = csum_last;
+
+ return match;
}

+struct llvm_suffix_pair {
+ struct hlist_node hash;
+ const char *orig;
+ const char *patched;
+};
+
+static DECLARE_HASHTABLE(suffix_map, 7);
+
/*
- * For each symbol in the original kernel, find its corresponding "twin" in the
- * patched kernel.
+ * Build a mapping of known orig-to-patched LLVM suffixes based on
+ * already-correlated symbol pairs. All promoted symbols from the same TU
+ * share the same .llvm.<hash> suffix, so one correlated pair seeds the map
+ * for the entire TU.
*/
-static int correlate_symbols(struct elfs *e)
+static int update_suffix_map(struct elf *elf)
{
- struct symbol *file1_sym, *file2_sym;
- struct symbol *sym1, *sym2;
+ struct llvm_suffix_pair *entry;
+ struct symbol *sym;

- file1_sym = first_file_symbol(e->orig);
- file2_sym = first_file_symbol(e->patched);
+ for_each_sym(elf, sym) {
+ const char *s1, *s2;
+ bool found;

- /*
- * Correlate any locals before the first FILE symbol. This has been
- * seen when LTO inexplicably strips the initramfs_data.o FILE symbol
- * due to the file only containing data and no code.
- */
- for_each_sym(e->orig, sym1) {
- if (sym1 == file1_sym || !is_local_sym(sym1))
- break;
+ if (!sym->twin)
+ continue;
+
+ s1 = llvm_suffix(sym->name);
+ s2 = llvm_suffix(sym->twin->name);

- if (dont_correlate(sym1))
+ if (!s1 || !s2)
continue;

- for_each_sym(e->patched, sym2) {
- if (sym2 == file2_sym || !is_local_sym(sym2))
+ found = false;
+ hash_for_each_possible(suffix_map, entry, hash, str_hash(s1)) {
+ if (!strcmp(entry->orig, s1)) {
+ found = true;
break;
+ }
+ }
+ if (found)
+ continue;

- if (sym2->twin || dont_correlate(sym2))
- continue;
+ entry = calloc(1, sizeof(*entry));
+ if (!entry) {
+ ERROR_GLIBC("calloc");
+ return -1;
+ }

- if (strcmp(sym1->demangled_name, sym2->demangled_name))
- continue;
+ entry->orig = s1;
+ entry->patched = s2;
+ hash_add(suffix_map, &entry->hash, str_hash(s1));
+ }

- sym1->twin = sym2;
- sym2->twin = sym1;
+ return 0;
+}
+
+/*
+ * Match by translating the symbol's .llvm.<hash> suffix through the suffix
+ * map to find the corresponding hash suffix for the patched object.
+ *
+ * Example: In the original kernel, TU drivers/base/core.c contains
+ * foo.llvm.12345 and bar.llvm.12345 (same TU, same hash). After patching,
+ * they become foo.llvm.67890 and bar.llvm.67890. If foo was already
+ * correlated by find_twin() (e.g., unique by name), the suffix map records
+ * .llvm.12345 -> .llvm.67890. When processing bar.llvm.12345, this
+ * function looks up .llvm.12345, gets .llvm.67890, constructs the name
+ * bar.llvm.67890, and finds the match.
+ */
+static struct symbol *find_twin_suffixed(struct elf *elf, struct symbol *sym1)
+{
+ const char *suffix, *patched_suffix = NULL;
+ struct symbol *sym2, *match = NULL;
+ char name[SYM_NAME_LEN];
+ struct llvm_suffix_pair *entry;
+ int count = 0;
+
+ suffix = llvm_suffix(sym1->name);
+ if (!suffix)
+ return NULL;
+
+ hash_for_each_possible(suffix_map, entry, hash, str_hash(suffix)) {
+ if (!strcmp(entry->orig, suffix)) {
+ patched_suffix = entry->patched;
break;
}
}
+ if (!patched_suffix)
+ return NULL;
+
+ if (snprintf_check(name, SYM_NAME_LEN, "%s%s",
+ sym1->demangled_name, patched_suffix))
+ return NULL;
+
+ for_each_sym_by_name(elf, name, sym2) {
+ if (sym2->twin || sym1->type != sym2->type || dont_correlate(sym2))
+ continue;
+ count++;
+ match = sym2;
+ }
+
+ if (count == 1)
+ return match;
+
+ return NULL;
+}
+
+/*
+ * Last-resort positional matching.
+ *
+ * Finds a symbol with the same position in the symbol table among
+ * same-demangled-name candidates, similar to livepatch sympos. Note that
+ * LLVM-promoted symbols are globals, which come after locals in the symbol
+ * table, so we have to be careful not to compare different scopes.
+ *
+ * Example: arch/x86/events/intel/core.c defines many __quirk variables via
+ * X86_MATCH_*() macros. In the symbol table they appear as __quirk.90,
+ * __quirk.97, __quirk.101, etc., all with demangled name __quirk, same
+ * scope, and same FILE symbol. No deterministic filter can distinguish
+ * them, so they're matched by position: the 1st __quirk in orig matches the
+ * 1st in patched, the 2nd matches the 2nd, etc.
+ *
+ * This is less deterministic than the other strategies, so it's done last.
+ */
+static struct symbol *find_twin_positional(struct elfs *e, struct symbol *sym1)
+{
+ unsigned int idx_orig = 0, idx_patched = 0;
+ unsigned int sym1_pos = 0;
+ struct symbol *sym2, *match = NULL;
+
+ for_each_sym_by_demangled_name(e->orig, sym1->demangled_name, sym2) {
+ if (sym2->twin || sym1->type != sym2->type || dont_correlate(sym2) ||
+ !maybe_same_file(sym1, sym2))
+ continue;
+ if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2) ||
+ is_llvm_sym(sym1) != is_llvm_sym(sym2))
+ continue;
+ if (sym1 == sym2)
+ sym1_pos = idx_orig;
+ idx_orig++;
+ }
+
+ for_each_sym_by_demangled_name(e->patched, sym1->demangled_name, sym2) {
+ if (sym2->twin || sym1->type != sym2->type || dont_correlate(sym2) ||
+ !maybe_same_file(sym1, sym2))
+ continue;
+ if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2) ||
+ is_llvm_sym(sym1) != is_llvm_sym(sym2))
+ continue;
+ if (idx_patched == sym1_pos)
+ match = sym2;
+ idx_patched++;
+ }
+
+ if (idx_orig != idx_patched)
+ return NULL;
+
+ return match;
+}
+
+/*
+ * Correlate symbols between the orig and patched objects. This is a
+ * prerequisite for detecting changed functions, as well as for properly
+ * translating relocations so they point to the correct symbol.
+ */
+static int correlate_symbols(struct elfs *e)
+{
+ struct symbol *file1_sym, *file2_sym;
+ struct symbol *sym1, *sym2;
+ bool progress;
+
+ /* Correlate FILE symbols */
+ file1_sym = first_file_symbol(e->orig);
+ file2_sym = first_file_symbol(e->patched);

- /* Correlate locals after the first FILE symbol */
for (; ; file1_sym = next_file_symbol(e->orig, file1_sym),
file2_sym = next_file_symbol(e->patched, file2_sym)) {

@@ -503,92 +783,52 @@ static int correlate_symbols(struct elfs *e)

file1_sym->twin = file2_sym;
file2_sym->twin = file1_sym;
-
- sym1 = file1_sym;
-
- for_each_sym_continue(e->orig, sym1) {
- if (is_file_sym(sym1) || !is_local_sym(sym1))
- break;
-
- if (dont_correlate(sym1))
- continue;
-
- sym2 = file2_sym;
- for_each_sym_continue(e->patched, sym2) {
- if (is_file_sym(sym2) || !is_local_sym(sym2))
- break;
-
- if (sym2->twin || dont_correlate(sym2))
- continue;
-
- if (strcmp(sym1->demangled_name, sym2->demangled_name))
- continue;
-
- sym1->twin = sym2;
- sym2->twin = sym1;
- break;
- }
- }
}

- /* Correlate globals */
- for_each_sym(e->orig, sym1) {
- if (sym1->bind == STB_LOCAL)
- continue;
-
- sym2 = find_global_symbol_by_name(e->patched, sym1->name);
- if (sym2 && !sym2->twin) {
- sym1->twin = sym2;
- sym2->twin = sym1;
- }
- }

/*
- * Correlate globals with demangled_name.
- * A separate loop is needed because we want to finish all the
- * full name correlations first.
+ * Correlate in two phases: loop deterministic levels until no more
+ * progress, then use positional fallback for the rest. This prevents
+ * the nondeterministic positional matching from stealing symbols that
+ * have deterministic matches.
*/
- for_each_sym(e->orig, sym1) {
- if (sym1->bind == STB_LOCAL || sym1->twin)
- continue;
-
- if (find_global_symbol_by_demangled_name(e->patched, sym1, &sym2))
- return -1;
-
- if (sym2 && !sym2->twin) {
+ hash_init(suffix_map);
+ do {
+ progress = false;
+ for_each_sym(e->orig, sym1) {
+ if (sym1->twin || dont_correlate(sym1))
+ continue;
+ sym2 = find_twin(e, sym1);
+ if (!sym2)
+ continue;
sym1->twin = sym2;
sym2->twin = sym1;
+ progress = true;
}
- }
-
- /* Correlate original locals with patched globals */
- for_each_sym(e->orig, sym1) {
- if (sym1->twin || dont_correlate(sym1) || !is_local_sym(sym1))
- continue;

- sym2 = find_global_symbol_by_name(e->patched, sym1->name);
- if (!sym2 && find_global_symbol_by_demangled_name(e->patched, sym1, &sym2))
+ if (update_suffix_map(e->orig))
return -1;

- if (sym2 && !sym2->twin) {
+ for_each_sym(e->orig, sym1) {
+ if (sym1->twin || dont_correlate(sym1))
+ continue;
+ sym2 = find_twin_suffixed(e->patched, sym1);
+ if (!sym2)
+ continue;
sym1->twin = sym2;
sym2->twin = sym1;
+ progress = true;
}
- }
+ } while (progress);

- /* Correlate original globals with patched locals */
- for_each_sym(e->patched, sym2) {
- if (sym2->twin || dont_correlate(sym2) || !is_local_sym(sym2))
+ for_each_sym(e->orig, sym1) {
+ if (sym1->twin || dont_correlate(sym1))
continue;
-
- sym1 = find_global_symbol_by_name(e->orig, sym2->name);
- if (!sym1 && find_global_symbol_by_demangled_name(e->orig, sym2, &sym1))
- return -1;
-
- if (sym1 && !sym1->twin) {
- sym2->twin = sym1;
- sym1->twin = sym2;
- }
+ sym2 = find_twin_positional(e, sym1);
+ if (!sym2)
+ continue;
+ sym1->twin = sym2;
+ sym2->twin = sym1;
}

for_each_sym(e->orig, sym1) {
@@ -800,19 +1040,24 @@ static void mark_included_function(struct symbol *func)
*/
static int mark_changed_functions(struct elfs *e)
{
- struct symbol *sym_orig, *patched_sym;
+ struct symbol *orig_sym, *patched_sym;
bool changed = false;

/* Find changed functions */
- for_each_sym(e->orig, sym_orig) {
- if (!is_func_sym(sym_orig) || dont_correlate(sym_orig))
+ for_each_sym(e->orig, orig_sym) {
+ if (dont_correlate(orig_sym))
continue;

- patched_sym = sym_orig->twin;
+ patched_sym = orig_sym->twin;
if (!patched_sym)
continue;

- if (sym_orig->csum.checksum != patched_sym->csum.checksum) {
+ if (orig_sym->csum.checksum != patched_sym->csum.checksum) {
+ if (!is_func_sym(orig_sym)) {
+ ERROR("changed data: %s", orig_sym->name);
+ return -1;
+ }
+
patched_sym->changed = 1;
mark_included_function(patched_sym);
changed = true;
@@ -837,7 +1082,7 @@ static int mark_changed_functions(struct elfs *e)
printf("%s: changed function: %s\n", objname, patched_sym->name);
}

- return !changed ? -1 : 0;
+ return !changed ? 1 : 0;
}

static int clone_included_functions(struct elfs *e)
@@ -1870,6 +2115,7 @@ static int copy_import_ns(struct elfs *e)
int cmd_klp_diff(int argc, const char **argv)
{
struct elfs e = {0};
+ int ret;

argc = parse_options(argc, argv, klp_diff_options, klp_diff_usage, 0);
if (argc != 3)
@@ -1896,7 +2142,10 @@ int cmd_klp_diff(int argc, const char **argv)
if (correlate_symbols(&e))
return -1;

- if (mark_changed_functions(&e))
+ ret = mark_changed_functions(&e);
+ if (ret < 0)
+ return -1;
+ if (ret > 0)
return 0;

e.out = elf_create_file(&e.orig->ehdr, argv[2]);