[PATCH v2 07/24] perf: Fix perf_pmu_register() vs perf_init_event()

From: Peter Zijlstra
Date: Wed Feb 05 2025 - 05:29:46 EST


There is a fairly obvious race between perf_init_event() doing
idr_find() and perf_pmu_register() doing idr_alloc() with an
incompletely initialized pmu pointer.

Avoid by doing idr_alloc() on a NULL pointer to register the id, and
swizzling the real pmu pointer at the end using idr_replace().

Also making sure to not set pmu members after publishing the pmu, duh.

[ introduce idr_cmpxchg() in order to better handle the idr_replace()
error case -- if it were to return an unexpected pointer, it will
already have replaced the value and there is no going back. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/events/core.c | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11739,6 +11739,21 @@ static int pmu_dev_alloc(struct pmu *pmu
static struct lock_class_key cpuctx_mutex;
static struct lock_class_key cpuctx_lock;

+static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new)
+{
+ void *tmp, *val = idr_find(idr, id);
+
+ if (val != old)
+ return false;
+
+ tmp = idr_replace(idr, new, id);
+ if (IS_ERR(tmp))
+ return false;
+
+ WARN_ON_ONCE(tmp != val);
+ return true;
+}
+
int perf_pmu_register(struct pmu *pmu, const char *name, int type)
{
int cpu, ret, max = PERF_TYPE_MAX;
@@ -11765,7 +11780,7 @@ int perf_pmu_register(struct pmu *pmu, c
if (type >= 0)
max = type;

- ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
+ ret = idr_alloc(&pmu_idr, NULL, max, 0, GFP_KERNEL);
if (ret < 0)
goto free_pdc;

@@ -11773,6 +11788,7 @@ int perf_pmu_register(struct pmu *pmu, c

type = ret;
pmu->type = type;
+ atomic_set(&pmu->exclusive_cnt, 0);

if (pmu_bus_running && !pmu->dev) {
ret = pmu_dev_alloc(pmu);
@@ -11821,14 +11837,22 @@ int perf_pmu_register(struct pmu *pmu, c
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;

+ /*
+ * Now that the PMU is complete, make it visible to perf_try_init_event().
+ */
+ if (!idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu))
+ goto free_context;
list_add_rcu(&pmu->entry, &pmus);
- atomic_set(&pmu->exclusive_cnt, 0);
+
ret = 0;
unlock:
mutex_unlock(&pmus_lock);

return ret;

+free_context:
+ free_percpu(pmu->cpu_pmu_context);
+
free_dev:
if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
device_del(pmu->dev);