[PATCH v2 05/16] perf annotate: Introduce extract_op_location callback for arch-specific parsing
From: Tengda Wu
Date: Fri Apr 03 2026 - 06:01:07 EST
Assembly syntax for operands varies significantly across different
architectures, which prevents the operand location (op_loc) parsing
logic in annotate_get_insn_location() from being directly reused.
To simplify the core logic and improve maintainability, move the
operand parsing inside the for_each_insn_op_loc loop into arch-specific
extract_op_location callbacks. This refactoring is intended to be a
cleanup with no functional changes.
Signed-off-by: Li Huafei <lihuafei1@xxxxxxxxxx>
Signed-off-by: Tengda Wu <wutengda@xxxxxxxxxxxxxxx>
---
.../util/annotate-arch/annotate-powerpc.c | 10 ++
tools/perf/util/annotate-arch/annotate-x86.c | 82 ++++++++++++++++
tools/perf/util/annotate.c | 96 ++-----------------
tools/perf/util/annotate.h | 2 +
tools/perf/util/disasm.h | 4 +
5 files changed, 105 insertions(+), 89 deletions(-)
diff --git a/tools/perf/util/annotate-arch/annotate-powerpc.c b/tools/perf/util/annotate-arch/annotate-powerpc.c
index 218207b52581..8d0b8def5955 100644
--- a/tools/perf/util/annotate-arch/annotate-powerpc.c
+++ b/tools/perf/util/annotate-arch/annotate-powerpc.c
@@ -390,6 +390,15 @@ static void update_insn_state_powerpc(struct type_state *state,
}
#endif /* HAVE_LIBDW_SUPPORT */
+static int extract_op_location_powerpc(const struct arch *arch __maybe_unused,
+ struct disasm_line *dl,
+ const char *op_str __maybe_unused, int op_idx,
+ struct annotated_op_loc *op_loc)
+{
+ get_powerpc_regs(dl->raw.raw_insn, !op_idx, op_loc);
+ return 0;
+}
+
const struct arch *arch__new_powerpc(const struct e_machine_and_e_flags *id,
const char *cpuid __maybe_unused)
{
@@ -406,5 +415,6 @@ const struct arch *arch__new_powerpc(const struct e_machine_and_e_flags *id,
#ifdef HAVE_LIBDW_SUPPORT
arch->update_insn_state = update_insn_state_powerpc;
#endif
+ arch->extract_op_location = extract_op_location_powerpc;
return arch;
}
diff --git a/tools/perf/util/annotate-arch/annotate-x86.c b/tools/perf/util/annotate-arch/annotate-x86.c
index c77aabd48eba..c63ca3250b95 100644
--- a/tools/perf/util/annotate-arch/annotate-x86.c
+++ b/tools/perf/util/annotate-arch/annotate-x86.c
@@ -3,6 +3,7 @@
#include <linux/compiler.h>
#include <assert.h>
#include <inttypes.h>
+#include <ctype.h>
#include "../annotate-data.h"
#include "../debug.h"
#include "../disasm.h"
@@ -808,6 +809,86 @@ static void update_insn_state_x86(struct type_state *state,
}
#endif
+/*
+ * Get register number and access offset from the given instruction.
+ * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs
+ * to revisit the format when it handles different architecture.
+ * Fills @reg and @offset when return 0.
+ */
+static int extract_reg_offset(const struct arch *arch, const char *str,
+ struct annotated_op_loc *op_loc)
+{
+ char *p;
+
+ if (arch->objdump.register_char == 0)
+ return -1;
+
+ /*
+ * It should start from offset, but it's possible to skip 0
+ * in the asm. So 0(%rax) should be same as (%rax).
+ *
+ * However, it also start with a segment select register like
+ * %gs:0x18(%rbx). In that case it should skip the part.
+ */
+ if (*str == arch->objdump.register_char) {
+ /* FIXME: Handle other segment registers */
+ if (!strncmp(str, "%gs:", 4))
+ op_loc->segment = INSN_SEG_X86_GS;
+
+ while (*str && !isdigit(*str) &&
+ *str != arch->objdump.memory_ref_char)
+ str++;
+ }
+
+ op_loc->offset = strtol(str, &p, 0);
+ op_loc->reg1 = arch__dwarf_regnum(arch, p);
+ if (op_loc->reg1 == -1)
+ return -1;
+
+ /* Get the second register */
+ if (op_loc->multi_regs)
+ op_loc->reg2 = arch__dwarf_regnum(arch, p + 1);
+
+ return 0;
+}
+
+static int extract_op_location_x86(const struct arch *arch,
+ struct disasm_line *dl __maybe_unused,
+ const char *op_str, int op_idx __maybe_unused,
+ struct annotated_op_loc *op_loc)
+{
+ const char *s = op_str;
+ char *p = NULL;
+
+ if (op_str == NULL)
+ return 0;
+
+ if (strchr(op_str, arch->objdump.memory_ref_char)) {
+ op_loc->mem_ref = true;
+ return extract_reg_offset(arch, op_str, op_loc);
+ }
+
+ /* FIXME: Handle other segment registers */
+ if (!strncmp(op_str, "%gs:", 4)) {
+ op_loc->segment = INSN_SEG_X86_GS;
+ op_loc->offset = strtol(op_str + 4,
+ &p, 0);
+ if (p && p != op_str + 4)
+ op_loc->imm = true;
+ return 0;
+ }
+
+ if (*s == arch->objdump.register_char) {
+ op_loc->reg1 = arch__dwarf_regnum(arch, s);
+ } else if (*s == arch->objdump.imm_char) {
+ op_loc->offset = strtol(s + 1, &p, 0);
+ if (p && p != s + 1)
+ op_loc->imm = true;
+ }
+
+ return 0;
+}
+
const struct arch *arch__new_x86(const struct e_machine_and_e_flags *id, const char *cpuid)
{
struct arch *arch = zalloc(sizeof(*arch));
@@ -847,5 +928,6 @@ const struct arch *arch__new_x86(const struct e_machine_and_e_flags *id, const c
#ifdef HAVE_LIBDW_SUPPORT
arch->update_insn_state = update_insn_state_x86;
#endif
+ arch->extract_op_location = extract_op_location_x86;
return arch;
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 63f0ee9d4c03..1bf69e00d76d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2450,7 +2450,7 @@ int annotate_check_args(void)
return 0;
}
-static int arch__dwarf_regnum(const struct arch *arch, const char *str)
+int arch__dwarf_regnum(const struct arch *arch, const char *str)
{
const char *p;
char *regname, *q;
@@ -2473,51 +2473,6 @@ static int arch__dwarf_regnum(const struct arch *arch, const char *str)
return reg;
}
-/*
- * Get register number and access offset from the given instruction.
- * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs
- * to revisit the format when it handles different architecture.
- * Fills @reg and @offset when return 0.
- */
-static int extract_reg_offset(const struct arch *arch, const char *str,
- struct annotated_op_loc *op_loc)
-{
- char *p;
-
- if (arch->objdump.register_char == 0)
- return -1;
-
- /*
- * It should start from offset, but it's possible to skip 0
- * in the asm. So 0(%rax) should be same as (%rax).
- *
- * However, it also start with a segment select register like
- * %gs:0x18(%rbx). In that case it should skip the part.
- */
- if (*str == arch->objdump.register_char) {
- if (arch__is_x86(arch)) {
- /* FIXME: Handle other segment registers */
- if (!strncmp(str, "%gs:", 4))
- op_loc->segment = INSN_SEG_X86_GS;
- }
-
- while (*str && !isdigit(*str) &&
- *str != arch->objdump.memory_ref_char)
- str++;
- }
-
- op_loc->offset = strtol(str, &p, 0);
- op_loc->reg1 = arch__dwarf_regnum(arch, p);
- if (op_loc->reg1 == -1)
- return -1;
-
- /* Get the second register */
- if (op_loc->multi_regs)
- op_loc->reg2 = arch__dwarf_regnum(arch, p + 1);
-
- return 0;
-}
-
/**
* annotate_get_insn_location - Get location of instruction
* @arch: the architecture info
@@ -2548,6 +2503,7 @@ int annotate_get_insn_location(const struct arch *arch, struct disasm_line *dl,
struct ins_operands *ops;
struct annotated_op_loc *op_loc;
int i;
+ int ret;
if (ins__is_lock(&dl->ins))
ops = dl->ops.locked.ops;
@@ -2573,50 +2529,12 @@ int annotate_get_insn_location(const struct arch *arch, struct disasm_line *dl,
/* Invalidate the register by default */
op_loc->reg1 = -1;
op_loc->reg2 = -1;
+ op_loc->mem_ref = mem_ref;
+ op_loc->multi_regs = multi_regs;
- if (insn_str == NULL) {
- if (!arch__is_powerpc(arch))
- continue;
- }
-
- /*
- * For powerpc, call get_powerpc_regs function which extracts the
- * required fields for op_loc, ie reg1, reg2, offset from the
- * raw instruction.
- */
- if (arch__is_powerpc(arch)) {
- op_loc->mem_ref = mem_ref;
- op_loc->multi_regs = multi_regs;
- get_powerpc_regs(dl->raw.raw_insn, !i, op_loc);
- } else if (strchr(insn_str, arch->objdump.memory_ref_char)) {
- op_loc->mem_ref = true;
- op_loc->multi_regs = multi_regs;
- extract_reg_offset(arch, insn_str, op_loc);
- } else {
- const char *s = insn_str;
- char *p = NULL;
-
- if (arch__is_x86(arch)) {
- /* FIXME: Handle other segment registers */
- if (!strncmp(insn_str, "%gs:", 4)) {
- op_loc->segment = INSN_SEG_X86_GS;
- op_loc->offset = strtol(insn_str + 4,
- &p, 0);
- if (p && p != insn_str + 4)
- op_loc->imm = true;
- continue;
- }
- }
-
- if (*s == arch->objdump.register_char) {
- op_loc->reg1 = arch__dwarf_regnum(arch, s);
- }
- else if (*s == arch->objdump.imm_char) {
- op_loc->offset = strtol(s + 1, &p, 0);
- if (p && p != s + 1)
- op_loc->imm = true;
- }
- }
+ ret = arch->extract_op_location(arch, dl, insn_str, i, op_loc);
+ if (ret)
+ return ret;
}
return 0;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 696e36dbf013..71195a27d38f 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -494,6 +494,8 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str,
int annotate_check_args(void);
+int arch__dwarf_regnum(const struct arch *arch, const char *str);
+
/**
* struct annotated_op_loc - Location info of instruction operand
* @reg1: First register in the operand
diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
index d3730ed86dba..94ee67bcbce7 100644
--- a/tools/perf/util/disasm.h
+++ b/tools/perf/util/disasm.h
@@ -16,6 +16,7 @@ struct symbol;
struct data_loc_info;
struct type_state;
struct disasm_line;
+struct annotated_op_loc;
struct e_machine_and_e_flags {
uint32_t e_flags;
@@ -49,6 +50,9 @@ struct arch {
struct data_loc_info *dloc, Dwarf_Die *cu_die,
struct disasm_line *dl);
#endif
+ int (*extract_op_location)(const struct arch *arch, struct disasm_line *dl,
+ const char *op_str, int op_idx,
+ struct annotated_op_loc *op_loc);
};
struct ins {
--
2.34.1