Re: [PATCH V8 2/6] perf: attach/detach PMU specific data
From: Liang, Kan
Date: Wed Mar 12 2025 - 15:53:35 EST
On 2025-03-12 3:18 p.m., Peter Zijlstra wrote:
> On Wed, Mar 12, 2025 at 11:25:21AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote:
>
>> +static int
>> +attach_global_ctx_data(struct kmem_cache *ctx_cache)
>> +{
>> + if (refcount_inc_not_zero(&global_ctx_data_ref))
>> + return 0;
>> +
>> + percpu_down_write(&global_ctx_data_rwsem);
>> + if (!refcount_inc_not_zero(&global_ctx_data_ref)) {
>> + struct task_struct *g, *p;
>> + struct perf_ctx_data *cd;
>> + int ret;
>> +
>> +again:
>> + /* Allocate everything */
>> + rcu_read_lock();
>> + for_each_process_thread(g, p) {
>> + cd = rcu_dereference(p->perf_ctx_data);
>> + if (cd && !cd->global) {
>> + cd->global = 1;
>> + if (!refcount_inc_not_zero(&cd->refcount))
>> + cd = NULL;
>> + }
>> + if (!cd) {
>> + get_task_struct(p);
>> + rcu_read_unlock();
>> +
>> + ret = attach_task_ctx_data(p, ctx_cache, true);
>> + put_task_struct(p);
>> + if (ret) {
>> + __detach_global_ctx_data();
>> + return ret;
>
> AFAICT this returns with global_ctx_data_rwsem taken, no?
Ah, yes
>
>> + }
>> + goto again;
>> + }
>> + }
>> + rcu_read_unlock();
>> +
>> + refcount_set(&global_ctx_data_ref, 1);
>> + }
>> + percpu_up_write(&global_ctx_data_rwsem);
>> +
>> + return 0;
>> +}
>
> Can we rework this with guards? A little something like so?
>
Yes. I will do more test and send a V9.
Thanks,
Kan
> ---
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5233,18 +5233,20 @@ static refcount_t global_ctx_data_ref;
> static int
> attach_global_ctx_data(struct kmem_cache *ctx_cache)
> {
> + struct task_struct *g, *p;
> + struct perf_ctx_data *cd;
> + int ret;
> +
> if (refcount_inc_not_zero(&global_ctx_data_ref))
> return 0;
>
> - percpu_down_write(&global_ctx_data_rwsem);
> - if (!refcount_inc_not_zero(&global_ctx_data_ref)) {
> - struct task_struct *g, *p;
> - struct perf_ctx_data *cd;
> - int ret;
> + guard(percpu_write)(&global_ctx_data_rwsem);
> + if (refcount_inc_not_zero(&global_ctx_data_ref))
> + return 0;
>
> again:
> - /* Allocate everything */
> - rcu_read_lock();
> + /* Allocate everything */
> + scoped_guard (rcu) {
> for_each_process_thread(g, p) {
> cd = rcu_dereference(p->perf_ctx_data);
> if (cd && !cd->global) {
> @@ -5254,24 +5256,23 @@ attach_global_ctx_data(struct kmem_cache
> }
> if (!cd) {
> get_task_struct(p);
> - rcu_read_unlock();
> -
> - ret = attach_task_ctx_data(p, ctx_cache, true);
> - put_task_struct(p);
> - if (ret) {
> - __detach_global_ctx_data();
> - return ret;
> - }
> - goto again;
> + goto alloc;
> }
> }
> - rcu_read_unlock();
> -
> - refcount_set(&global_ctx_data_ref, 1);
> }
> - percpu_up_write(&global_ctx_data_rwsem);
> +
> + refcount_set(&global_ctx_data_ref, 1);
>
> return 0;
> +
> +alloc:
> + ret = attach_task_ctx_data(p, ctx_cache, true);
> + put_task_struct(p);
> + if (ret) {
> + __detach_global_ctx_data();
> + return ret;
> + }
> + goto again;
> }
>
> static int
> @@ -5338,15 +5339,12 @@ static void detach_global_ctx_data(void)
> if (refcount_dec_not_one(&global_ctx_data_ref))
> return;
>
> - percpu_down_write(&global_ctx_data_rwsem);
> + guard(perpcu_write)(&global_ctx_data_rwsem);
> if (!refcount_dec_and_test(&global_ctx_data_ref))
> - goto unlock;
> + return;
>
> /* remove everything */
> __detach_global_ctx_data();
> -
> -unlock:
> - percpu_up_write(&global_ctx_data_rwsem);
> }
>
> static void detach_perf_ctx_data(struct perf_event *event)
> @@ -8776,9 +8774,9 @@ perf_event_alloc_task_data(struct task_s
> if (!ctx_cache)
> return;
>
> - percpu_down_read(&global_ctx_data_rwsem);
> + guard(percpu_read)(&global_ctx_data_rwsem);
> + guard(rcu)();
>
> - rcu_read_lock();
> cd = rcu_dereference(child->perf_ctx_data);
>
> if (!cd) {
> @@ -8787,21 +8785,16 @@ perf_event_alloc_task_data(struct task_s
> * when attaching the perf_ctx_data.
> */
> if (!refcount_read(&global_ctx_data_ref))
> - goto rcu_unlock;
> + return;
> rcu_read_unlock();
> attach_task_ctx_data(child, ctx_cache, true);
> - goto up_rwsem;
> + return;
> }
>
> if (!cd->global) {
> cd->global = 1;
> refcount_inc(&cd->refcount);
> }
> -
> -rcu_unlock:
> - rcu_read_unlock();
> -up_rwsem:
> - percpu_up_read(&global_ctx_data_rwsem);
> }
>
> void perf_event_fork(struct task_struct *task)
> @@ -13845,9 +13838,8 @@ void perf_event_exit_task(struct task_st
> /*
> * Detach the perf_ctx_data for the system-wide event.
> */
> - percpu_down_read(&global_ctx_data_rwsem);
> + guard(percpu_read)(&global_ctx_data_rwsem);
> detach_task_ctx_data(child);
> - percpu_up_read(&global_ctx_data_rwsem);
> }
>
> static void perf_free_event(struct perf_event *event,
> diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
> index c012df33a9f0..36f3082f2d82 100644
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -8,6 +8,7 @@
> #include <linux/wait.h>
> #include <linux/rcu_sync.h>
> #include <linux/lockdep.h>
> +#include <linux/cleanup.h>
>
> struct percpu_rw_semaphore {
> struct rcu_sync rss;
> @@ -125,6 +126,13 @@ extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
> extern void percpu_down_write(struct percpu_rw_semaphore *);
> extern void percpu_up_write(struct percpu_rw_semaphore *);
>
> +DEFINE_GUARD(percpu_read, struct perpcu_rw_semaphore *,
> + perpcu_down_read(_T), percpu_up_read(_T))
> +DEFINE_GUARD_COND(perpcu_read, _try, percpu_down_read_trylock(_T))
> +
> +DEFINE_GUARD(percpu_write, struct percpu_rw_semaphore *,
> + percpu_down_write(_T), perpcu_up_write(_T))
> +
> static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
> {
> return atomic_read(&sem->block);
>