Re: [PATCH 3/4] perf arm_spe: Decode Arm N1 IMPDEF events
From: James Clark
Date: Tue Apr 07 2026 - 08:40:27 EST
On 02/04/2026 4:26 pm, Ian Rogers wrote:
On Wed, Apr 1, 2026 at 7:26 AM James Clark <james.clark@xxxxxxxxxx> wrote:
From the TRM [1], N1 has one IMPDEF event which isn't covered by the
common list. Add a framework so that more cores can be added in the
future and that the N1 IMPDEF event can be decoded. Also increase the
size of the buffer because we're adding more strings and if it gets
truncated it falls back to a hex dump only.
[1]: https://developer.arm.com/documentation/100616/0401/Statistical-Profiling-Extension/implementation-defined-features-of-SPE
Suggested-by: Al Grant <al.grant@xxxxxxx>
Signed-off-by: James Clark <james.clark@xxxxxxxxxx>
---
tools/perf/util/arm-spe-decoder/Build | 2 +
.../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 45 ++++++++++++++++++++--
.../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 5 ++-
tools/perf/util/arm-spe.c | 13 ++++---
4 files changed, 54 insertions(+), 11 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
index ab500e0efe24..97a298d1e279 100644
--- a/tools/perf/util/arm-spe-decoder/Build
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -1 +1,3 @@
perf-util-y += arm-spe-pkt-decoder.o arm-spe-decoder.o
+
+CFLAGS_arm-spe-pkt-decoder.o += -I$(srctree)/tools/arch/arm64/include/ -I$(OUTPUT)arch/arm64/include/generated/
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index c880b0dec3a1..42a7501d4dfe 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -15,6 +15,8 @@
#include "arm-spe-pkt-decoder.h"
+#include "../../arm64/include/asm/cputype.h"
Sashiko spotted:
https://sashiko.dev/#/patchset/20260401-james-spe-impdef-decode-v1-0-ad0d372c220c%40linaro.org
"""
This isn't a bug, but does this include directive rely on accidental
path normalization?
The relative path ../../arm64/include/asm/cputype.h does not exist relative
to arm-spe-pkt-decoder.c. It only compiles because the Build file adds
-I$(srctree)/tools/arch/arm64/include/ to CFLAGS.
Would it be cleaner to use #include <asm/cputype.h> to explicitly rely on
the include path?
[ ... ]
"""
I wouldn't use <asm/cputype.h> due to cross-compilation and the like,
instead just add the extra "../" into the include path.
Do you mean change the #include to this?
#include "../../../arm64/include/asm/cputype.h"
I still need to add:
CFLAGS_arm-spe-pkt-decoder.o += -I$(srctree)/tools/arch/arm64/include/
To make the this include in cputype.h work:
#include <asm/sysreg.h>
Which probably only works because there isn't a sysreg.h on other architectures. But I'm not sure what the significance of ../../ vs ../../../ is if either compile? arm-spe.c already does it with ../../ which is what I copied.
+
static const char * const arm_spe_packet_name[] = {
[ARM_SPE_PAD] = "PAD",
[ARM_SPE_END] = "END",
@@ -307,6 +309,11 @@ static const struct ev_string common_ev_strings[] = {
{ .event = 0, .desc = NULL },
};
+static const struct ev_string n1_event_strings[] = {
+ { .event = 12, .desc = "LATE-PREFETCH" },
+ { .event = 0, .desc = NULL },
+};
+
static u64 print_event_list(int *err, char **buf, size_t *buf_len,
const struct ev_string *ev_strings, u64 payload)
{
@@ -318,14 +325,44 @@ static u64 print_event_list(int *err, char **buf, size_t *buf_len,
return payload;
}
+struct event_print_handle {
+ const struct midr_range *midr_ranges;
+ const struct ev_string *ev_strings;
+};
+
+#define EV_PRINT(range, strings) \
+ { \
+ .midr_ranges = range, \
+ .ev_strings = strings, \
+ }
+
+static const struct midr_range n1_event_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ {},
+};
+
+static const struct event_print_handle event_print_handles[] = {
+ EV_PRINT(n1_event_encoding_cpus, n1_event_strings),
+};
+
static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
- char *buf, size_t buf_len)
+ char *buf, size_t buf_len, u64 midr)
{
u64 payload = packet->payload;
int err = 0;
arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV");
- print_event_list(&err, &buf, &buf_len, common_ev_strings, payload);
+ payload = print_event_list(&err, &buf, &buf_len, common_ev_strings,
+ payload);
+
+ /* Try to decode IMPDEF bits for known CPUs */
+ for (unsigned int i = 0; i < ARRAY_SIZE(event_print_handles); i++) {
+ if (is_midr_in_range_list(midr,
+ event_print_handles[i].midr_ranges))
+ payload = print_event_list(&err, &buf, &buf_len,
+ event_print_handles[i].ev_strings,
+ payload);
+ }
return err;
}
@@ -506,7 +543,7 @@ static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet,
}
int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
- size_t buf_len)
+ size_t buf_len, u64 midr)
{
int idx = packet->index;
unsigned long long payload = packet->payload;
@@ -522,7 +559,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name);
break;
case ARM_SPE_EVENTS:
- err = arm_spe_pkt_desc_event(packet, buf, buf_len);
+ err = arm_spe_pkt_desc_event(packet, buf, buf_len, midr);
break;
case ARM_SPE_OP_TYPE:
err = arm_spe_pkt_desc_op_type(packet, buf, buf_len);
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index adf4cde320aa..17b067fe3c87 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -11,7 +11,7 @@
#include <stddef.h>
#include <stdint.h>
-#define ARM_SPE_PKT_DESC_MAX 256
+#define ARM_SPE_PKT_DESC_MAX 512
#define ARM_SPE_NEED_MORE_BYTES -1
#define ARM_SPE_BAD_PACKET -2
@@ -186,5 +186,6 @@ const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
int arm_spe_get_packet(const unsigned char *buf, size_t len,
struct arm_spe_pkt *packet);
-int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len,
+ u64 midr);
#endif
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 7447b000f9cd..46f0309c092b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -135,7 +135,7 @@ struct data_source_handle {
}
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
- unsigned char *buf, size_t len)
+ unsigned char *buf, size_t len, u64 midr)
{
struct arm_spe_pkt packet;
size_t pos = 0;
@@ -161,7 +161,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
color_fprintf(stdout, color, " ");
if (ret > 0) {
ret = arm_spe_pkt_desc(&packet, desc,
- ARM_SPE_PKT_DESC_MAX);
+ ARM_SPE_PKT_DESC_MAX, midr);
if (!ret)
color_fprintf(stdout, color, " %s\n", desc);
} else {
@@ -174,10 +174,10 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
}
static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
- size_t len)
+ size_t len, u64 midr)
{
printf(".\n");
- arm_spe_dump(spe, buf, len);
+ arm_spe_dump(spe, buf, len, midr);
}
static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
@@ -1469,8 +1469,11 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
/* Dump here now we have copied a piped trace out of the pipe */
if (dump_trace) {
if (auxtrace_buffer__get_data(buffer, fd)) {
+ u64 midr = 0;
+
+ arm_spe__get_midr(spe, buffer->cpu.cpu, &midr);
Sashiko claims to have spotted an issue here:
"""
Is it possible for arm_spe__get_midr() to cause a segmentation fault here?
If the trace is from an older recording (metadata version 1) and the
environment lacks a CPUID string (such as during cross-architecture
analysis), perf_env__cpuid() returns NULL.
It appears arm_spe__get_midr() then passes this NULL pointer to
strtol(cpuid, NULL, 16), which leads to undefined behavior.
"""
But this feels like, if this happens you're already having a bad time
and these changes aren't necessarily making things worse.
Thanks,
Ian
Yeah I think it might be possible so I can add an error instead of a segfault. I'll check the rest of the Sashiko comments too.
arm_spe_dump_event(spe, buffer->data,
- buffer->size);
+ buffer->size, midr);
auxtrace_buffer__put_data(buffer);
}
}
--
2.34.1