[PATCH v1 01/58] perf inject: Fix itrace branch stack synthesis

From: Ian Rogers

Date: Sun Apr 19 2026 - 19:59:35 EST


When using "perf inject --itrace=L" to synthesize branch stacks from
AUX data, several issues caused failures:

1. The synthesized samples were delivered without the
PERF_SAMPLE_BRANCH_STACK flag if it was not in the original event's
sample_type. Fixed by using sample_type | evsel->synth_sample_type
in intel_pt_deliver_synth_event.

2. The record layout was misaligned because of inconsistent handling
of PERF_SAMPLE_BRANCH_HW_INDEX. Fixed by explicitly writing nr and
hw_idx in perf_event__synthesize_sample.

3. Modifying evsel->core.attr.sample_type early in __cmd_inject caused
parse failures for subsequent records in the input file. Fixed by
moving this modification to just before writing the header.

4. perf_event__repipe_sample was narrowed to only synthesize samples
when branch stack injection was requested, and restored the use of
perf_inject__cut_auxtrace_sample as a fallback to preserve
functionality.

Assisted-by: Gemini:gemini-3.1-pro-preview
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/builtin-inject.c | 63 +++++++++++++++++++++++++++++-
tools/perf/util/intel-pt.c | 3 +-
tools/perf/util/synthetic-events.c | 6 +--
3 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index f174bc69cec4..9da334740017 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -375,7 +375,52 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,

build_id__mark_dso_hit(tool, event, sample, evsel, machine);

- if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
+ if (inject->itrace_synth_opts.set &&
+ (inject->itrace_synth_opts.last_branch ||
+ inject->itrace_synth_opts.add_last_branch)) {
+ union perf_event *event_copy = (void *)inject->event_copy;
+ struct branch_stack dummy_bs = { .nr = 0 };
+ int err;
+ size_t sz;
+ u64 orig_type = evsel->core.attr.sample_type;
+ u64 orig_branch_type = evsel->core.attr.branch_sample_type;
+
+ if (event_copy == NULL) {
+ inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!inject->event_copy)
+ return -ENOMEM;
+
+ event_copy = (void *)inject->event_copy;
+ }
+
+ if (!sample->branch_stack)
+ sample->branch_stack = &dummy_bs;
+
+ if (inject->itrace_synth_opts.add_last_branch) {
+ /* Temporarily add in type bits for synthesis. */
+ evsel->core.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ evsel->core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ evsel->core.attr.sample_type &= ~PERF_SAMPLE_AUX;
+ }
+
+ sz = perf_event__sample_event_size(sample, evsel->core.attr.sample_type,
+ evsel->core.attr.read_format);
+
+ event_copy->header.type = PERF_RECORD_SAMPLE;
+ event_copy->header.size = sz;
+
+ err = perf_event__synthesize_sample(event_copy, evsel->core.attr.sample_type,
+ evsel->core.attr.read_format, sample);
+
+ evsel->core.attr.sample_type = orig_type;
+ evsel->core.attr.branch_sample_type = orig_branch_type;
+
+ if (err) {
+ pr_err("Failed to synthesize sample\n");
+ return err;
+ }
+ event = event_copy;
+ } else if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
event = perf_inject__cut_auxtrace_sample(inject, event, sample);
if (IS_ERR(event))
return PTR_ERR(event);
@@ -2434,12 +2479,26 @@ static int __cmd_inject(struct perf_inject *inject)
* synthesized hardware events, so clear the feature flag.
*/
if (inject->itrace_synth_opts.set) {
+ struct evsel *evsel;
+
perf_header__clear_feat(&session->header,
HEADER_AUXTRACE);
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ evsel->core.attr.sample_type &= ~PERF_SAMPLE_AUX;
+ }
+
if (inject->itrace_synth_opts.last_branch ||
- inject->itrace_synth_opts.add_last_branch)
+ inject->itrace_synth_opts.add_last_branch) {
perf_header__set_feat(&session->header,
HEADER_BRANCH_STACK);
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ evsel->core.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ evsel->core.attr.branch_sample_type |=
+ PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+ }
}

/*
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index fc9eec8b54b8..b9fcf3b457b0 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2557,7 +2557,8 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
sample.transaction = txn;
}

- ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ sample_type | evsel->synth_sample_type);
perf_sample__exit(&sample);
return ret;
}
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 85bee747f4cd..33b530b73796 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1719,9 +1719,9 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo

if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- /* nr, hw_idx */
- sz += 2 * sizeof(u64);
- memcpy(array, sample->branch_stack, sz);
+ *array++ = sample->branch_stack->nr;
+ *array++ = sample->branch_stack->hw_idx;
+ memcpy(array, sample->branch_stack->entries, sz);
array = (void *)array + sz;
}

--
2.54.0.rc1.513.gad8abe7a5a-goog