[tip: perf/urgent] perf: Fix data race in perf_event_set_bpf_handler()
From: tip-bot2 for Peter Zijlstra
Date: Tue Feb 24 2026 - 07:42:01 EST
The following commit has been merged into the perf/urgent branch of tip:
Commit-ID: 5004d5c59874b18c8ecbcb507053750c8b47353c
Gitweb: https://git.kernel.org/tip/5004d5c59874b18c8ecbcb507053750c8b47353c
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Tue, 24 Feb 2026 13:29:09 +01:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 24 Feb 2026 13:33:39 +01:00
perf: Fix data race in perf_event_set_bpf_handler()
On Fri, Jan 30, 2026 at 11:07:33AM +0100, Peter Zijlstra wrote:
> On Tue, Jan 27, 2026 at 04:37:19PM +0800, Qing Wang wrote:
> > On Tue, 27 Jan 2026 at 10:36, Henry Zhang <henryzhangjcle@xxxxxxxxx> wrote:
> > > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > > index a0fa488bce84..1f3ed9e87507 100644
> > > --- a/kernel/events/core.c
> > > +++ b/kernel/events/core.c
> > > @@ -10349,7 +10349,7 @@ static inline int perf_event_set_bpf_handler(struct perf_event *event,
> > > return -EPROTO;
> > > }
> > >
> > > - event->prog = prog;
> > > + WRITE_ONCE(event->prog, prog);
> > > event->bpf_cookie = bpf_cookie;
> > > return 0;
> > > }
> > > @@ -10407,7 +10407,9 @@ static int __perf_event_overflow(struct perf_event *event,
> > > if (event->attr.aux_pause)
> > > perf_event_aux_pause(event->aux_event, true);
> > >
> > > - if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> > > + struct bpf_prog *prog = READ_ONCE(event->prog);
> > > +
> > > + if (prog && prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> > > !bpf_overflow_handler(event, data, regs))
> > > goto out;
> >
> > Looking at this code, I guess there may be an serious issue: a potential
> > use-after-free (UAF) risk when accessing event->prog in __perf_event_overflow.
> >
> > CPU 0 (interrupt context) CPU 1 (process context)
> > read event->prog
> > perf_event_free_bpf_handler()
> > put(prog)
> > free(prog)
> > access memory pointed to by prog
> >
> > This scenario need to be more analysis.
>
> This can only happen if the event can overlap with removal, which it
> typically cannot -- but I'll have to audit the software events.
>
> Specifically, events happen in IRQ/NMI context, and event removal
> involves an IPI to that very CPU, which by necessity will then have to
> wait for event completion.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://patch.msgid.link/20260224122909.GV1395416@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
---
kernel/events/core.c | 42 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 22a0f40..1f5699b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10777,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ /*
+ * Entry point from hardware PMI, interrupts should be disabled here.
+ * This serializes us against perf_event_remove_from_context() in
+ * things like perf_event_release_kernel().
+ */
+ lockdep_assert_irqs_disabled();
+
return __perf_event_overflow(event, 1, data, regs);
}
@@ -10853,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;
+ /*
+ * This is:
+ * - software preempt
+ * - tracepoint preempt
+ * - tp_target_task irq (ctx->lock)
+ * - uprobes preempt/irq
+ * - kprobes preempt/irq
+ * - hw_breakpoint irq
+ *
+ * Any of these are sufficient to hold off RCU and thus ensure @event
+ * exists.
+ */
+ lockdep_assert_preemption_disabled();
local64_add(nr, &event->count);
if (!regs)
@@ -10861,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;
+ /*
+ * Serialize against event_function_call() IPIs like normal overflow
+ * event handling. Specifically, must not allow
+ * perf_event_release_kernel() -> perf_remove_from_context() to make
+ * progress and 'release' the event from under us.
+ */
+ guard(irqsave)();
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return;
+
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
data->period = nr;
return perf_swevent_overflow(event, 1, data, regs);
@@ -11359,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_sample_data data;
struct perf_event *event;
+ /*
+ * Per being a tracepoint, this runs with preemption disabled.
+ */
+ lockdep_assert_preemption_disabled();
+
struct perf_raw_record raw = {
.frag = {
.size = entry_size,
@@ -11691,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
+ /*
+ * Exception context, will have interrupts disabled.
+ */
+ lockdep_assert_irqs_disabled();
+
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12155,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
- if (__perf_event_overflow(event, 1, &data, regs))
+ if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}