[PATCH 05/28] perf session: Fix PERF_RECORD_READ swap and dump for variable-length events
From: Arnaldo Carvalho de Melo
Date: Sat May 09 2026 - 23:36:57 EST
From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
The kernel dynamically sizes PERF_RECORD_READ based on
attr.read_format: only the fields enabled by PERF_FORMAT_TOTAL_TIME_ENABLED,
PERF_FORMAT_TOTAL_TIME_RUNNING, PERF_FORMAT_ID, and PERF_FORMAT_LOST
are emitted, packed with no gaps.
perf_event__read_swap() unconditionally byte-swapped time_enabled,
time_running, and id at their fixed struct offsets, causing
out-of-bounds access on smaller events and swapping the wrong
bytes when not all format fields are present. It also dropped
the sample_id_all swap entirely.
Replace the individual field swaps with a single mem_bswap_64()
over the entire tail from value onward. Since every field after
pid/tid is u64 regardless of which combination is present, this
correctly handles any read_format combination and any trailing
sample_id_all fields.
Similarly, dump_read() accessed optional fields via fixed struct
offsets, displaying values from wrong positions when not all
format bits are set. Walk the packed u64 array sequentially
instead, with bounds checks against event->header.size.
Assisted-by: Claude Opus 4.6 (1M context) <noreply@xxxxxxxxxxxxx>
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
tools/perf/util/session.c | 61 +++++++++++++++++++++++++++------------
1 file changed, 43 insertions(+), 18 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index aae0651fb6f025a1..20b70d6fb7cc8ed4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -354,17 +354,22 @@ static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
swap_sample_id_all(event, &event->fork + 1);
}
-static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
+static void perf_event__read_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
{
- event->read.pid = bswap_32(event->read.pid);
- event->read.tid = bswap_32(event->read.tid);
- event->read.value = bswap_64(event->read.value);
- event->read.time_enabled = bswap_64(event->read.time_enabled);
- event->read.time_running = bswap_64(event->read.time_running);
- event->read.id = bswap_64(event->read.id);
+ size_t tail;
- if (sample_id_all)
- swap_sample_id_all(event, &event->read + 1);
+ event->read.pid = bswap_32(event->read.pid);
+ event->read.tid = bswap_32(event->read.tid);
+ /*
+ * Everything after pid/tid is u64: the read values (variable
+ * set determined by attr.read_format, which we don't have
+ * here) optionally followed by sample_id_all fields.
+ * Since all are u64, swap the entire remaining tail at once.
+ */
+ tail = event->header.size - offsetof(struct perf_record_read, value);
+ tail &= ~(size_t)(sizeof(__u64) - 1);
+ mem_bswap_64(&event->read.value, tail);
}
static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
@@ -1198,8 +1203,9 @@ static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event
static void dump_read(struct evsel *evsel, union perf_event *event)
{
- struct perf_record_read *read_event = &event->read;
u64 read_format;
+ __u64 *array;
+ void *end;
if (!dump_trace)
return;
@@ -1211,18 +1217,37 @@ static void dump_read(struct evsel *evsel, union perf_event *event)
return;
read_format = evsel->core.attr.read_format;
+ /*
+ * The kernel packs only the enabled read_format fields
+ * after value, with no gaps. Walk the packed array
+ * instead of using fixed struct offsets.
+ */
+ array = &event->read.value + 1;
+ end = (void *)event + event->header.size;
- if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... time enabled : %" PRI_lu64 "\n", *array++);
+ }
- if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... time running : %" PRI_lu64 "\n", *array++);
+ }
- if (read_format & PERF_FORMAT_ID)
- printf("... id : %" PRI_lu64 "\n", read_event->id);
+ if (read_format & PERF_FORMAT_ID) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... id : %" PRI_lu64 "\n", *array++);
+ }
- if (read_format & PERF_FORMAT_LOST)
- printf("... lost : %" PRI_lu64 "\n", read_event->lost);
+ if (read_format & PERF_FORMAT_LOST) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... lost : %" PRI_lu64 "\n", *array++);
+ }
}
static struct machine *machines__find_for_cpumode(struct machines *machines,
--
2.54.0