Re: [PATCH 5/7] perf/x86/intel/uncore: Introduce PMU flags and broken state
From: Ian Rogers
Date: Tue May 12 2026 - 20:29:02 EST
On Tue, May 12, 2026 at 4:39 PM Zide Chen <zide.chen@xxxxxxxxx> wrote:
>
> Replace the boolean 'registered' field in intel_uncore_pmu with an
> unsigned long 'flags' field, and add a PMU_BROKEN flag to track box
> setup failures.
>
> When any box fails to initialize, the PMU is marked broken. Broken
> PMUs reject new event assignments and skip future box setup attempts.
> If the PMU was already registered, it remains so to avoid disrupting
> in-flight events on other boxes.
>
> To prevent retry loops, die_refcnt and cpu_refcnt are not decremented
> on failure, and broken PMUs are skipped in the CPU hotplug and box
> allocation paths.
>
> Signed-off-by: Zide Chen <zide.chen@xxxxxxxxx>
Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
Thanks,
Ian
> ---
> arch/x86/events/intel/uncore.c | 36 +++++++++++++++++++++++-------
> arch/x86/events/intel/uncore.h | 12 +++++++++-
> arch/x86/events/intel/uncore_snb.c | 2 +-
> 3 files changed, 40 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
> index 00ed4e5047ac..922ba299533e 100644
> --- a/arch/x86/events/intel/uncore.c
> +++ b/arch/x86/events/intel/uncore.c
> @@ -757,7 +757,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
>
> pmu = uncore_event_to_pmu(event);
> /* no device found for this pmu */
> - if (!pmu->registered)
> + if (!uncore_pmu_available(pmu))
> return -ENOENT;
>
> /* Sampling not supported yet */
> @@ -953,16 +953,16 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
>
> ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
> if (!ret)
> - pmu->registered = true;
> + uncore_pmu_set_registered(pmu);
> return ret;
> }
>
> static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
> {
> - if (!pmu->registered)
> + if (!uncore_pmu_registered(pmu))
> return;
> perf_pmu_unregister(&pmu->pmu);
> - pmu->registered = false;
> + WRITE_ONCE(pmu->flags, 0);
> }
>
> static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
> @@ -1155,7 +1155,13 @@ static int uncore_box_setup(struct intel_uncore_pmu *pmu,
>
> /* die_refcnt tracks online dies, not only functioning boxes. */
> dies = atomic_inc_return(&pmu->die_refcnt);
> - uncore_box_init(box);
> +
> + if (uncore_pmu_broken(pmu))
> + return -ENODEV;
> +
> + ret = uncore_box_init(box);
> + if (ret)
> + goto err;
>
> /* First active box registers the pmu. */
> if (dies > 1)
> @@ -1167,6 +1173,19 @@ static int uncore_box_setup(struct intel_uncore_pmu *pmu,
>
> return 0;
> err:
> + /*
> + * On failure on any box, mark the per-package PMU as broken regardless
> + * of whether it was registered or not.
> + *
> + * Don't decrement die_refcnt to prevent any future CPU online
> + * event or PCI probe, from retrying the failed PMU registration.
> + *
> + * Don't decrement cpu_refcnt to avoid other in-die CPUs from
> + * trying to set up the PMU box again.
> + *
> + * Don't kfree box; MSR and MMIO boxes are freed at module exit only.
> + */
> + uncore_pmu_set_broken(pmu);
> uncore_box_exit(box);
> return ret;
> }
> @@ -1502,7 +1521,8 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
>
> if (old_cpu < 0) {
> WARN_ON_ONCE(box->cpu != -1);
> - if (uncore_die_has_box(type, die, pmu->pmu_idx)) {
> + if (uncore_die_has_box(type, die, pmu->pmu_idx) &&
> + !uncore_pmu_broken(pmu)) {
> box->cpu = new_cpu;
> cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
> }
> @@ -1512,7 +1532,7 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
> WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu);
> box->cpu = -1;
> cpumask_clear_cpu(old_cpu, &pmu->cpu_mask);
> - if (new_cpu < 0)
> + if (new_cpu < 0 || uncore_pmu_broken(pmu))
> continue;
>
> if (!uncore_die_has_box(type, die, pmu->pmu_idx))
> @@ -1592,7 +1612,7 @@ static int allocate_boxes(struct intel_uncore_type **types,
> type = *types;
> pmu = type->pmus;
> for (i = 0; i < type->num_boxes; i++, pmu++) {
> - if (pmu->boxes[die])
> + if (pmu->boxes[die] || uncore_pmu_broken(pmu))
> continue;
> box = uncore_alloc_box(type, cpu_to_node(cpu));
> if (!box)
> diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
> index 5ee05545116a..4d3a99bf1455 100644
> --- a/arch/x86/events/intel/uncore.h
> +++ b/arch/x86/events/intel/uncore.h
> @@ -146,13 +146,23 @@ struct intel_uncore_pmu {
> struct pmu pmu;
> char name[UNCORE_PMU_NAME_LEN];
> int pmu_idx;
> - bool registered;
> + unsigned long flags;
> atomic_t die_refcnt;
> cpumask_t cpu_mask;
> struct intel_uncore_type *type;
> struct intel_uncore_box **boxes;
> };
>
> +#define PMU_REGISTERED_BIT 0
> +#define PMU_BROKEN_BIT 1
> +
> +#define uncore_pmu_registered(pmu) test_bit(PMU_REGISTERED_BIT, &(pmu)->flags)
> +#define uncore_pmu_broken(pmu) test_bit(PMU_BROKEN_BIT, &(pmu)->flags)
> +#define uncore_pmu_available(pmu) (uncore_pmu_registered(pmu) && \
> + !uncore_pmu_broken(pmu))
> +#define uncore_pmu_set_registered(pmu) set_bit(PMU_REGISTERED_BIT, &(pmu)->flags)
> +#define uncore_pmu_set_broken(pmu) set_bit(PMU_BROKEN_BIT, &(pmu)->flags)
> +
> struct intel_uncore_extra_reg {
> raw_spinlock_t lock;
> u64 config, config1, config2;
> diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
> index c5347920541c..055131c508ff 100644
> --- a/arch/x86/events/intel/uncore_snb.c
> +++ b/arch/x86/events/intel/uncore_snb.c
> @@ -940,7 +940,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
>
> pmu = uncore_event_to_pmu(event);
> /* no device found for this pmu */
> - if (!pmu->registered)
> + if (!uncore_pmu_available(pmu))
> return -ENOENT;
>
> /* Sampling not supported yet */
> --
> 2.54.0
>