[PATCH v2 15/16] perf annotate-arm64: Support per-cpu variable access tracking
From: Tengda Wu
Date: Fri Apr 03 2026 - 06:01:00 EST
Extend update_insn_state() for arm64 to handle per-cpu variable
addressing.
On arm64, per-cpu variables are accessed by adding a per-cpu offset
(typically from the '__per_cpu_offset' array) to the global variable's
address. This logic often results in the following instruction pattern:
adrp x4, <page>
add x4, x4, #offset // x4 = &__per_cpu_offset
ldr x6, [x4, w0, sxtw #3] // x6 = __per_cpu_offset[cpu]
...
adrp x5, <page>
add x5, x5, #offset // x5 = &global_var
ldr x0, [x6, x5] // Pattern A: direct load per-cpu instance
OR
add x0, x6, x5 // Pattern B: compute per-cpu addr
To handle such cases:
1. Identify per-cpu base initialization: Detect 'adrp/add' pairs that
resolve to the '__per_cpu_offset' symbol and mark the destination
register as TSR_KIND_PERCPU_BASE.
2. Propagate type information: During subsequent 'ldr' or 'add' steps,
if one operand is a PERCPU_BASE and the other is a global variable,
inherit the type from the global variable to correctly identify the
per-cpu instance.
A real-world example is shown below:
ffff8000808f2d28 <cppc_set_perf>:
ffff8000808f2d38: adrp x2, ffff800082033000 <event_array+0x58>
ffff8000808f2d3c: add x5, x2, #0x3f8 // x5 = &__per_cpu_offset
ffff8000808f2d44: adrp x2, ffff800081f73000 <vmcore_cb_srcu_srcu_data+0x80>
ffff8000808f2d48: add x2, x2, #0x6b8 // x2 = &cpu_pcc_subspace_idx
ffff8000808f2d6c: ldr x5, [x5, w0, sxtw #3] // x5 = __per_cpu_offset[cpu]
ffff8000808f2d80: ldr w23, [x5, x2] // PMU sample, per_cpu(cpu_pcc_subspace_idx, cpu)
Before this commit, the tracker could not link x5 back to a per-cpu
context, resulting in an incorrect data type resolution:
adrp [10] global addr=ffff800082033000 -> reg2
add [14] global addr=ffff8000820333f8 -> reg5
adrp [1c] global addr=ffff800081f73000 -> reg2
add [20] global addr=ffff800081f736b8 -> reg2
ldr [44] global (ffff8000820333f8) -> reg5 type='long unsigned int[]' size=0x1000
chk [58] reg5 offset=0 ok=1 kind=1 (long unsigned int[]) : Good!
found by insn track: 0(reg5, reg2) type-offset=0
final result: type='long unsigned int' size=0x8
After this commit, the tracker correctly identifies the per-cpu flow and
resolves the actual variable type:
ldr [44] global (ffff8000820333f8) -> reg5 percpu base
chk [58] reg5 offset=0 ok=1 kind=2 percpu var : Good!
found by insn track: 0(reg5, reg2) type-offset=0
final result: type='int' size=0x4
Signed-off-by: Tengda Wu <wutengda@xxxxxxxxxxxxxxx>
---
.../perf/util/annotate-arch/annotate-arm64.c | 69 ++++++++++++++++++-
tools/perf/util/annotate-data.c | 33 ++++++---
2 files changed, 92 insertions(+), 10 deletions(-)
diff --git a/tools/perf/util/annotate-arch/annotate-arm64.c b/tools/perf/util/annotate-arch/annotate-arm64.c
index 6b954bbfaf8d..89b6b596f984 100644
--- a/tools/perf/util/annotate-arch/annotate-arm64.c
+++ b/tools/perf/util/annotate-arch/annotate-arm64.c
@@ -378,6 +378,26 @@ static void update_insn_state_arm64(struct type_state *state,
pr_debug_dtp("add [%x] global addr=%"PRIx64" -> reg%d\n",
insn_offset, tsr->addr, sreg);
+ return;
+ }
+
+ /* Handle per-cpu base addresses */
+ if (dst_tsr.kind == TSR_KIND_PERCPU_BASE) {
+ if (!dst->multi_regs || !has_reg_type(state, dst->reg2) ||
+ state->regs[dst->reg2].kind != TSR_KIND_GLOBAL_ADDR ||
+ !state->regs[dst->reg2].ok)
+ return;
+
+ /* Inherit type from the global variable */
+ tsr->type = state->regs[dst->reg2].type;
+ tsr->kind = state->regs[dst->reg2].kind;
+ tsr->offset = state->regs[dst->reg2].offset;
+ tsr->addr = state->regs[dst->reg2].addr;
+ tsr->ok = true;
+
+ pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
+ insn_offset, tsr->addr, sreg);
+ pr_debug_type_name(&tsr->type, tsr->kind);
}
return;
@@ -491,6 +511,15 @@ static void update_insn_state_arm64(struct type_state *state,
u64 ip = dloc->ms->sym->start + dl->al.offset;
u64 addr = dst_tsr.addr + dst->offset;
int offset;
+ u8 kind;
+ const char *var_name = NULL;
+
+ /* it might be per-cpu offset */
+ if (get_global_var_info(dloc, addr, &var_name, &offset) &&
+ !strcmp(var_name, "__per_cpu_offset"))
+ kind = TSR_KIND_PERCPU_BASE;
+ else
+ kind = TSR_KIND_TYPE;
if (!get_global_var_type(cu_die, dloc, ip, addr, &offset,
&type_die) ||
@@ -500,13 +529,49 @@ static void update_insn_state_arm64(struct type_state *state,
}
tsr->type = type_die;
- tsr->kind = TSR_KIND_TYPE;
+ tsr->kind = kind;
tsr->offset = offset;
- tsr->addr = addr;
+ tsr->addr = 0;
tsr->ok = true;
+
pr_debug_dtp("ldr [%x] global (%"PRIx64") -> reg%d",
insn_offset, addr, sreg);
pr_debug_type_name(&tsr->type, tsr->kind);
+ return;
+ }
+
+ /* Or check if it's a per-cpu base address */
+ if (dst_tsr.kind == TSR_KIND_PERCPU_BASE) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ u64 addr;
+ int offset;
+ /*
+ * If reg2 is a global variable, this means reg1 is
+ * an index into the variable's per-cpu array, so
+ * dereference type from reg2.
+ */
+ if (!dst->multi_regs || !has_reg_type(state, dst->reg2) ||
+ state->regs[dst->reg2].kind != TSR_KIND_GLOBAL_ADDR ||
+ !state->regs[dst->reg2].ok)
+ return;
+
+ addr = state->regs[dst->reg2].addr;
+ if (!get_global_var_type(cu_die, dloc, ip, addr, &offset,
+ &type_die) ||
+ !die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = offset;
+ tsr->addr = 0;
+ tsr->ok = true;
+
+ pr_debug_dtp("ldr [%x] percpu (reg%d, reg%d) -> reg%d",
+ insn_offset, dreg, dst->reg2, sreg);
+ pr_debug_type_name(&tsr->type, tsr->kind);
}
return;
}
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index b75d50b2c46f..7161417d1c76 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1230,20 +1230,37 @@ static enum type_match_result check_matching_type(struct type_state *state,
}
if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) {
- u64 var_addr = dloc->op->offset;
+ u64 var_addr;
int var_offset;
pr_debug_dtp("percpu var");
- if (dloc->op->multi_regs) {
- int reg2 = dloc->op->reg2;
+ if (arch__is_arm64(dloc->arch)) {
+ int reg2;
- if (dloc->op->reg2 == reg)
- reg2 = dloc->op->reg1;
+ if (!dloc->op->multi_regs)
+ return PERF_TMR_BAIL_OUT;
- if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
- state->regs[reg2].kind == TSR_KIND_CONST)
- var_addr += state->regs[reg2].imm_value;
+ reg2 = dloc->op->reg2;
+ if (!has_reg_type(state, reg2) ||
+ state->regs[reg2].kind != TSR_KIND_GLOBAL_ADDR ||
+ !state->regs[reg2].ok)
+ return PERF_TMR_BAIL_OUT;
+
+ var_addr = state->regs[reg2].addr;
+ } else {
+ var_addr = dloc->op->offset;
+
+ if (dloc->op->multi_regs) {
+ int reg2 = dloc->op->reg2;
+
+ if (dloc->op->reg2 == reg)
+ reg2 = dloc->op->reg1;
+
+ if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
+ state->regs[reg2].kind == TSR_KIND_CONST)
+ var_addr += state->regs[reg2].imm_value;
+ }
}
if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
--
2.34.1