[PATCH v3a 5/7] perf: Unify perf_event_free_task() / perf_event_exit_task_context()
From: Peter Zijlstra
Date: Mon Mar 10 2025 - 11:36:27 EST
Both perf_event_free_task() and perf_event_exit_task_context() are
very similar, except perf_event_exit_task_context() is a little more
generic / makes less assumptions.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/events/core.c | 90 ++++++++++++---------------------------------------
1 file changed, 22 insertions(+), 68 deletions(-)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13546,13 +13546,11 @@ perf_event_exit_event(struct perf_event
perf_event_wakeup(event);
}
-static void perf_event_exit_task_context(struct task_struct *child)
+static void perf_event_exit_task_context(struct task_struct *child, bool exit)
{
struct perf_event_context *child_ctx, *clone_ctx = NULL;
struct perf_event *child_event, *next;
- WARN_ON_ONCE(child != current);
-
child_ctx = perf_pin_task_context(child);
if (!child_ctx)
return;
@@ -13597,13 +13595,31 @@ static void perf_event_exit_task_context
* won't get any samples after PERF_RECORD_EXIT. We can however still
* get a few PERF_RECORD_READ events.
*/
- perf_event_task(child, child_ctx, 0);
+ if (exit)
+ perf_event_task(child, child_ctx, 0);
list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
perf_event_exit_event(child_event, child_ctx);
mutex_unlock(&child_ctx->mutex);
+ if (!exit) {
+ /*
+ * perf_event_release_kernel() could still have a reference on
+ * this context. In that case we must wait for these events to
+ * have been freed (in particular all their references to this
+ * task must've been dropped).
+ *
+ * Without this copy_process() will unconditionally free this
+ * task (irrespective of its reference count) and
+ * _free_event()'s put_task_struct(event->hw.target) will be a
+ * use-after-free.
+ *
+ * Wait for all events to drop their context reference.
+ */
+ wait_var_event(&child_ctx->refcount,
+ refcount_read(&child_ctx->refcount) == 1);
+ }
put_ctx(child_ctx);
}
@@ -13631,7 +13647,7 @@ void perf_event_exit_task(struct task_st
}
mutex_unlock(&child->perf_event_mutex);
- perf_event_exit_task_context(child);
+ perf_event_exit_task_context(child, true);
/*
* The perf_event_exit_task_context calls perf_event_task
@@ -13642,27 +13658,6 @@ void perf_event_exit_task(struct task_st
perf_event_task(child, NULL, 0);
}
-static void perf_free_event(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- struct perf_event *parent = event->parent;
-
- if (WARN_ON_ONCE(!parent))
- return;
-
- mutex_lock(&parent->child_mutex);
- list_del_init(&event->child_list);
- mutex_unlock(&parent->child_mutex);
-
- put_event(parent);
-
- raw_spin_lock_irq(&ctx->lock);
- perf_group_detach(event);
- list_del_event(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
- free_event(event);
-}
-
/*
* Free a context as created by inheritance by perf_event_init_task() below,
* used by fork() in case of fail.
@@ -13672,48 +13667,7 @@ static void perf_free_event(struct perf_
*/
void perf_event_free_task(struct task_struct *task)
{
- struct perf_event_context *ctx;
- struct perf_event *event, *tmp;
-
- ctx = rcu_access_pointer(task->perf_event_ctxp);
- if (!ctx)
- return;
-
- mutex_lock(&ctx->mutex);
- raw_spin_lock_irq(&ctx->lock);
- /*
- * Destroy the task <-> ctx relation and mark the context dead.
- *
- * This is important because even though the task hasn't been
- * exposed yet the context has been (through child_list).
- */
- RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
- WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
- put_task_struct(task); /* cannot be last */
- raw_spin_unlock_irq(&ctx->lock);
-
-
- list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
- perf_free_event(event, ctx);
-
- mutex_unlock(&ctx->mutex);
-
- /*
- * perf_event_release_kernel() could've stolen some of our
- * child events and still have them on its free_list. In that
- * case we must wait for these events to have been freed (in
- * particular all their references to this task must've been
- * dropped).
- *
- * Without this copy_process() will unconditionally free this
- * task (irrespective of its reference count) and
- * _free_event()'s put_task_struct(event->hw.target) will be a
- * use-after-free.
- *
- * Wait for all events to drop their context reference.
- */
- wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
- put_ctx(ctx); /* must be last */
+ perf_event_exit_task_context(task, false);
}
void perf_event_delayed_put(struct task_struct *task)