Re: [PATCH] perf callchain: Fix suspicious RCU usage in get_callchain_entry()

From: Peter Zijlstra
Date: Mon Jul 15 2024 - 06:47:42 EST


On Mon, Jul 15, 2024 at 12:23:27PM +0200, Radoslaw Zielonek wrote:
> The rcu_dereference() is using rcu_read_lock_held() as a checker, but
> BPF in bpf_prog_test_run_syscall() is using rcu_read_lock_trace() locker.
> To fix this issue the proper checker has been used
> (rcu_read_lock_trace_held() || rcu_read_lock_held())

How does that fix it? release_callchain_buffers() does call_rcu(), not
call_rcu_tracing().

Does a normal RCU grace period fully imply an RCU-tracing grace period?

> ---
> kernel/events/callchain.c | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 1273be84392c..a8af7cd50626 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -11,6 +11,7 @@
> #include <linux/perf_event.h>
> #include <linux/slab.h>
> #include <linux/sched/task_stack.h>
> +#include <linux/rcupdate_trace.h>
>
> #include "internal.h"
>
> @@ -32,7 +33,7 @@ static inline size_t perf_callchain_entry__sizeof(void)
> static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
> static atomic_t nr_callchain_events;
> static DEFINE_MUTEX(callchain_mutex);
> -static struct callchain_cpus_entries *callchain_cpus_entries;
> +static struct callchain_cpus_entries __rcu *callchain_cpus_entries;
>
>
> __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
> @@ -158,7 +159,13 @@ struct perf_callchain_entry *get_callchain_entry(int *rctx)
> if (*rctx == -1)
> return NULL;
>
> - entries = rcu_dereference(callchain_cpus_entries);
> + /*
> + * BPF locked rcu using rcu_read_lock_trace() in
> + * bpf_prog_test_run_syscall()
> + */
> + entries = rcu_dereference_check(callchain_cpus_entries,
> + rcu_read_lock_trace_held() ||
> + rcu_read_lock_held());
> if (!entries) {
> put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx);
> return NULL;
> --
> 2.43.0
>