[PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap

From: yuhaocheng035

Date: Wed Mar 25 2026 - 06:36:29 EST


From: Haocheng Yu <yuhaocheng035@xxxxxxxxx>

Syzkaller reported a refcount_t: addition on 0; use-after-free warning
in perf_mmap.

The issue is caused by a race condition between a failing mmap() setup
and a concurrent mmap() on a dependent event (e.g., using output
redirection).

In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
event->rb with the mmap_mutex held. The mutex is then released to
perform map_range().

If map_range() fails, perf_mmap_close() is called to clean up.
However, since the mutex was dropped, another thread attaching to
this event (via inherited events or output redirection) can acquire
the mutex, observe the valid event->rb pointer, and attempt to
increment its reference count. If the cleanup path has already
dropped the reference count to zero, this results in a
use-after-free or refcount saturation warning.

Fix this by extending the scope of mmap_mutex to cover the
map_range() call. This ensures that the ring buffer initialization
and mapping (or cleanup on failure) happens atomically effectively,
preventing other threads from accessing a half-initialized or
dying ring buffer.

v2:
Because expanding the guarded region would cause the event->mmap_mutex
to be acquired repeatedly in the perf_mmap_close function, potentially
leading to a self deadlock, the original logic of perf_mmap_close was
retained, and the mutex-holding logic was modified to obtain the
perf_mmap_close_locked function.

v3:
The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
to perf_mmap_close.

Reported-by: kernel test robot <lkp@xxxxxxxxx>
Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@xxxxxxxxx/
Suggested-by: Ian Rogers <irogers@xxxxxxxxxx>
Signed-off-by: Haocheng Yu <yuhaocheng035@xxxxxxxxx>
---
kernel/events/core.c | 78 +++++++++++++++++++++++++++-----------------
1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2c35acc2722b..a3228c587de1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
* the buffer here, where we still have a VM context. This means we need
* to detach all events redirecting to us.
*/
-static void perf_mmap_close(struct vm_area_struct *vma)
+static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
+ bool holds_event_mmap_lock)
{
- struct perf_event *event = vma->vm_file->private_data;
+ struct perf_event *iter_event;
mapped_f unmapped = get_mapped(event, event_unmapped);
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
@@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
if (refcount_dec_and_test(&rb->mmap_count))
detach_rest = true;

- if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
+ if ((!holds_event_mmap_lock &&
+ !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
+ (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
goto out_put;

ring_buffer_attach(event, NULL);
- mutex_unlock(&event->mmap_mutex);
+ if (!holds_event_mmap_lock)
+ mutex_unlock(&event->mmap_mutex);

/* If there's still other mmap()s of this buffer, we're done. */
if (!detach_rest)
@@ -6789,8 +6793,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
*/
again:
rcu_read_lock();
- list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
- if (!atomic_long_inc_not_zero(&event->refcount)) {
+ list_for_each_entry_rcu(iter_event, &rb->event_list, rb_entry) {
+ if (!atomic_long_inc_not_zero(&iter_event->refcount)) {
/*
* This event is en-route to free_event() which will
* detach it and remove it from the list.
@@ -6799,7 +6803,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
}
rcu_read_unlock();

- mutex_lock(&event->mmap_mutex);
+ if (!holds_event_mmap_lock)
+ mutex_lock(&iter_event->mmap_mutex);
/*
* Check we didn't race with perf_event_set_output() which can
* swizzle the rb from under us while we were waiting to
@@ -6810,11 +6815,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)
* still restart the iteration to make sure we're not now
* iterating the wrong list.
*/
- if (event->rb == rb)
- ring_buffer_attach(event, NULL);
+ if (iter_event->rb == rb)
+ ring_buffer_attach(iter_event, NULL);

- mutex_unlock(&event->mmap_mutex);
- put_event(event);
+ if (!holds_event_mmap_lock)
+ mutex_unlock(&iter_event->mmap_mutex);
+ put_event(iter_event);

/*
* Restart the iteration; either we're on the wrong list or
@@ -6842,6 +6848,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
ring_buffer_put(rb); /* could be last */
}

+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+ struct perf_event *event = vma->vm_file->private_data;
+
+ __perf_mmap_close(vma, event, false);
+}
+
+static void perf_mmap_close_locked(struct vm_area_struct *vma, struct perf_event *event)
+{
+ __perf_mmap_close(vma, event, true);
+}
+
static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
{
/* The first page is the user control page, others are read-only. */
@@ -7167,28 +7185,28 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
ret = perf_mmap_aux(vma, event, nr_pages);
if (ret)
return ret;
- }

- /*
- * Since pinned accounting is per vm we cannot allow fork() to copy our
- * vma.
- */
- vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
- vma->vm_ops = &perf_mmap_vmops;
+ /*
+ * Since pinned accounting is per vm we cannot allow fork() to copy our
+ * vma.
+ */
+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
+ vma->vm_ops = &perf_mmap_vmops;

- mapped = get_mapped(event, event_mapped);
- if (mapped)
- mapped(event, vma->vm_mm);
+ mapped = get_mapped(event, event_mapped);
+ if (mapped)
+ mapped(event, vma->vm_mm);

- /*
- * Try to map it into the page table. On fail, invoke
- * perf_mmap_close() to undo the above, as the callsite expects
- * full cleanup in this case and therefore does not invoke
- * vmops::close().
- */
- ret = map_range(event->rb, vma);
- if (ret)
- perf_mmap_close(vma);
+ /*
+ * Try to map it into the page table. On fail, invoke
+ * perf_mmap_close() to undo the above, as the callsite expects
+ * full cleanup in this case and therefore does not invoke
+ * vmops::close().
+ */
+ ret = map_range(event->rb, vma);
+ if (ret)
+ perf_mmap_close_locked(vma, event);
+ }

return ret;
}

base-commit: 7d0a66e4bb9081d75c82ec4957c50034cb0ea449
--
2.51.0