[PATCH] perf/core: Fix warning warning due to unordred pmu_ctx_list
From: Luo Gengkun
Date: Mon Jan 20 2025 - 06:35:48 EST
Syskaller triggers a warning due to prev_epc->pmu != next_epc->pmu in
perf_event_swap_task_ctx_data. vmcore shows that two lists have the same
perf_event_pmu_context, but not in the same order.
The problem is that when inheritance is performed, it traverses the ordered
groups of events, and inserts the new perf_event_pmu_context into
child_ctx->pmu_ctx_list which is unordered. So the order of pmu_ctx_list in
the parent and child may be different.
The follow testcase can trigger above warning:
# perf record -e cycles --call-graph lbr -- taskset -c 3 ./a.out &
# perf stat -e cpu-clock,cs -p xxx // xxx is the pid of a.out
test.c
void main() {
int count = 0;
pid_t pid;
printf("%d running\n", getpid());
sleep(30);
printf("running\n");
pid = fork();
if (pid == -1) {
printf("fork error\n");
return;
}
if (pid == 0) {
while (1) {
count++;
}
} else {
while (1) {
count++;
}
}
}
The testcase first open a lbr event, so it will alloc task_ctx_data, and
then open tracepoint and software events, so the parent ctx will have 3
different perf_event_pmu_contexts. When doing inherit, child ctx will
insert the perf_event_pmu_context in another order then the warning will
trigger.
To fix this problem, add pmu_ctx_insertion_sort to make sure the
pmu_ctx_list is ordered.
Fixes: bd2756811766 ("perf: Rewrite core context handling")
Signed-off-by: Luo Gengkun <luogengkun@xxxxxxxxxxxxxxx>
---
kernel/events/core.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 95b01a51139d..1bdff3ef0ce2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4953,6 +4953,24 @@ find_get_context(struct task_struct *task, struct perf_event *event)
return ERR_PTR(err);
}
+/*
+ * This function ensures that ctx->pmu_ctx_list is ordered, so that no warning
+ * is triggered due to prev_epc->pmu != next_epc->pmu.
+ */
+static void pmu_ctx_insertion_sort(struct perf_event_pmu_context *new,
+ struct perf_event_context *ctx)
+{
+ struct perf_event_pmu_context *epc;
+
+ lockdep_assert_held(&ctx->lock);
+
+ list_for_each_entry(epc, &ctx->pmu_ctx_list, pmu_ctx_entry) {
+ if (epc->pmu > new->pmu)
+ break;
+ }
+ list_add(&new->pmu_ctx_entry, epc->pmu_ctx_entry.prev);
+}
+
static struct perf_event_pmu_context *
find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
struct perf_event *event)
@@ -4974,7 +4992,7 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
if (!epc->ctx) {
atomic_set(&epc->refcount, 1);
epc->embedded = 1;
- list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
+ pmu_ctx_insertion_sort(epc, ctx);
epc->ctx = ctx;
} else {
WARN_ON_ONCE(epc->ctx != ctx);
@@ -5021,7 +5039,7 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
printk(KERN_INFO
"lgk: ctx %p insert pmu ctx %p, pmu is %p!\n", ctx, epc, epc->pmu);
- list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
+ pmu_ctx_insertion_sort(epc, ctx);
epc->ctx = ctx;
found_epc:
--
2.34.1