Re: [PATCH 6/7] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering on CPU hotplug
From: Chen, Zide
Date: Wed May 13 2026 - 14:58:42 EST
On 5/13/2026 1:59 AM, Mi, Dapeng wrote:
>
> On 5/13/2026 7:30 AM, Zide Chen wrote:
>> In uncore_event_cpu_online(), uncore_box_ref() was called before
>> uncore_change_context(). uncore_box_ref() gates on box->cpu >= 0,
>> but box->cpu is still -1 at that point because uncore_change_context()
>> has not run yet. As a result, the box is never initialized on the
>> first CPU to come online in a die, leaving it permanently
>> uninitialized in the single-CPU-per-die case.
>>
>> Thus, cpu_refcnt is one count below the true value, and in the CPU
>> offline path, the box will be torn down on the second-to-last CPU.
>>
>> In uncore_event_cpu_offline(), uncore_box_unref() was called after
>> uncore_change_context(), so box->cpu is already -1 when the collector
>> CPU goes offline, which prevents it from tearing down the box.
>>
>> Fix by swapping the call order in both paths so that
>> uncore_box_{ref,unref}() runs at the point where box->cpu reflects
>> the correct context.
>>
>> Fixes: c74443d92f68 ("perf/x86/uncore: Support per PMU cpumask")
>> Signed-off-by: Zide Chen <zide.chen@xxxxxxxxx>
>> ---
>> arch/x86/events/intel/uncore.c | 56 ++++++++++++++++++----------------
>> 1 file changed, 29 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
>> index 922ba299533e..399f434e1a7d 100644
>> --- a/arch/x86/events/intel/uncore.c
>> +++ b/arch/x86/events/intel/uncore.c
>> @@ -1574,9 +1574,15 @@ static int uncore_event_cpu_offline(unsigned int cpu)
>> {
>> int die, target;
>>
>> + /* Clear the references */
>> + die = topology_logical_die_id(cpu);
>> + uncore_box_unref(uncore_msr_uncores, die);
>> + uncore_box_unref(uncore_mmio_uncores, die);
>> +
>> /* Check if exiting cpu is used for collecting uncore events */
>> if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
>> - goto unref;
>> + return 0;
>> +
>> /* Find a new cpu to collect uncore events */
>> target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
>>
>> @@ -1589,20 +1595,14 @@ static int uncore_event_cpu_offline(unsigned int cpu)
>> uncore_change_context(uncore_msr_uncores, cpu, target);
>> uncore_change_context(uncore_mmio_uncores, cpu, target);
>> uncore_change_context(uncore_pci_uncores, cpu, target);
>> -
>> -unref:
>> - /* Clear the references */
>> - die = topology_logical_die_id(cpu);
>> - uncore_box_unref(uncore_msr_uncores, die);
>> - uncore_box_unref(uncore_mmio_uncores, die);
>> return 0;
>> }
>>
>> -static int allocate_boxes(struct intel_uncore_type **types,
>> +static void allocate_boxes(struct intel_uncore_type **types,
>> unsigned int die, unsigned int cpu)
>> {
>> struct intel_uncore_box *box, *tmp;
>> - struct intel_uncore_type *type;
>> + struct intel_uncore_type *type, **start = types;
>> struct intel_uncore_pmu *pmu;
>> LIST_HEAD(allocated);
>> int i;
>> @@ -1627,14 +1627,21 @@ static int allocate_boxes(struct intel_uncore_type **types,
>> list_del_init(&box->active_list);
>> box->pmu->boxes[die] = box;
>> }
>> - return 0;
>> + return;
>>
>> cleanup:
>> list_for_each_entry_safe(box, tmp, &allocated, active_list) {
>> list_del_init(&box->active_list);
>> kfree(box);
>> }
>> - return -ENOMEM;
>> +
>> + /* mark the PMU broken to prevent future ussage. */
>> + for (; *start; start++) {
>> + type = *start;
>> + pmu = type->pmus;
>> + for (i = 0; i < type->num_boxes; i++, pmu++)
>> + uncore_pmu_set_broken(pmu);
>> + }
>
> It looks all PMUs of all types are set to be broken even the boxes
> allocation of some PMUs didn't fail. Could we set the only the failed and
> later PMUs to be broken?
Right, it should simply set one PMU broken:
- if (!box)
+ if (!box) {
+ uncore_pmu_set_broken(pmu);
goto cleanup;
+ }
>> }
>>
>> static int uncore_box_ref(struct intel_uncore_type **types,
>> @@ -1643,11 +1650,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
>> struct intel_uncore_type *type;
>> struct intel_uncore_pmu *pmu;
>> struct intel_uncore_box *box;
>> - int i, ret;
>> -
>> - ret = allocate_boxes(types, die, cpu);
>> - if (ret)
>> - return ret;
>> + int i;
>>
>> for (; *types; types++) {
>> type = *types;
>> @@ -1664,27 +1667,26 @@ static int uncore_box_ref(struct intel_uncore_type **types,
>>
>> static int uncore_event_cpu_online(unsigned int cpu)
>> {
>> - int die, target, msr_ret, mmio_ret;
>> + int die, target;
>>
>> die = topology_logical_die_id(cpu);
>> - msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
>> - mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
>> + allocate_boxes(uncore_msr_uncores, die, cpu);
>> + allocate_boxes(uncore_mmio_uncores, die, cpu);
>>
>> /*
>> * Check if there is an online cpu in the package
>> * which collects uncore events already.
>> */
>> target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
>> - if (target < nr_cpu_ids)
>> - return 0;
>> -
>> - cpumask_set_cpu(cpu, &uncore_cpu_mask);
>> -
>> - if (!msr_ret)
>> + if (target >= nr_cpu_ids) {
>> + cpumask_set_cpu(cpu, &uncore_cpu_mask);
>> uncore_change_context(uncore_msr_uncores, -1, cpu);
>> - if (!mmio_ret)
>> uncore_change_context(uncore_mmio_uncores, -1, cpu);
>> - uncore_change_context(uncore_pci_uncores, -1, cpu);
>> + uncore_change_context(uncore_pci_uncores, -1, cpu);
>> + }
>> +
>> + uncore_box_ref(uncore_msr_uncores, die, cpu);
>> + uncore_box_ref(uncore_mmio_uncores, die, cpu);
>> return 0;
>> }
>>