Re: [Patch v9 12/12] perf/x86/intel: Add counter group support for arch-PEBS

From: Mi, Dapeng

Date: Mon Mar 09 2026 - 22:11:58 EST



On 3/10/2026 6:59 AM, Ian Rogers wrote:
> On Wed, Oct 29, 2025 at 3:25 AM Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> wrote:
>> Base on previous adaptive PEBS counter snapshot support, add counter
>> group support for architectural PEBS. Since arch-PEBS shares same
>> counter group layout with adaptive PEBS, directly reuse
>> __setup_pebs_counter_group() helper to process arch-PEBS counter group.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
>> ---
>> arch/x86/events/intel/core.c | 38 ++++++++++++++++++++++++++++---
>> arch/x86/events/intel/ds.c | 29 ++++++++++++++++++++---
>> arch/x86/include/asm/msr-index.h | 6 +++++
>> arch/x86/include/asm/perf_event.h | 13 ++++++++---
>> 4 files changed, 77 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 75cba28b86d5..cb64018321dd 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -3014,6 +3014,17 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
>>
>> if (pebs_data_cfg & PEBS_DATACFG_LBRS)
>> ext |= ARCH_PEBS_LBR & cap.caps;
>> +
>> + if (pebs_data_cfg &
>> + (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
>> + ext |= ARCH_PEBS_CNTR_GP & cap.caps;
>> +
>> + if (pebs_data_cfg &
>> + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
>> + ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
>> +
>> + if (pebs_data_cfg & PEBS_DATACFG_METRICS)
>> + ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
>> }
>>
>> if (cpuc->n_pebs == cpuc->n_large_pebs)
>> @@ -3038,6 +3049,9 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
>> }
>> }
>>
>> + if (is_pebs_counter_event_group(event))
>> + ext |= ARCH_PEBS_CNTR_ALLOW;
>> +
>> if (cpuc->cfg_c_val[hwc->idx] != ext)
>> __intel_pmu_update_event_ext(hwc->idx, ext);
>> }
>> @@ -4323,6 +4337,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event)
>> return false;
>> }
>>
>> +static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
>> +{
>> + u64 caps;
>> +
>> + if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
>> + return true;
>> +
>> + caps = hybrid(pmu, arch_pebs_cap).caps;
>> + if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
>> u64 *cause_mask, int *num)
>> {
>> @@ -4471,8 +4499,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
>> }
>>
>> if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
>> - (x86_pmu.intel_cap.pebs_format >= 6) &&
>> - x86_pmu.intel_cap.pebs_baseline &&
>> + intel_pmu_has_pebs_counter_group(event->pmu) &&
>> is_sampling_event(event) &&
>> event->attr.precise_ip)
>> event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
>> @@ -5420,6 +5447,8 @@ static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
>> x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
>> if (caps & ARCH_PEBS_LBR)
>> x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
>> + if (caps & ARCH_PEBS_CNTR_MASK)
>> + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
>>
>> if (!(caps & ARCH_PEBS_AUX))
>> x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
>> @@ -7134,8 +7163,11 @@ __init int intel_pmu_init(void)
>> * Many features on and after V6 require dynamic constraint,
>> * e.g., Arch PEBS, ACR.
>> */
>> - if (version >= 6)
>> + if (version >= 6) {
>> x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
>> + x86_pmu.late_setup = intel_pmu_late_setup;
>> + }
>> +
>> /*
>> * Install the hw-cache-events table:
>> */
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index c66e9b562de3..c93bf971d97b 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -1530,13 +1530,20 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
>>
>> u64 intel_get_arch_pebs_data_config(struct perf_event *event)
>> {
>> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> u64 pebs_data_cfg = 0;
>> + u64 cntr_mask;
>>
>> if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
>> return 0;
>>
>> pebs_data_cfg |= pebs_update_adaptive_cfg(event);
>>
>> + cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
>> + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
>> + PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
>> + pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
>> +
>> return pebs_data_cfg;
>> }
>>
>> @@ -2444,6 +2451,24 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
>> }
>> }
>>
>> + if (header->cntr) {
>> + struct arch_pebs_cntr_header *cntr = next_record;
>> + unsigned int nr;
>> +
>> + next_record += sizeof(struct arch_pebs_cntr_header);
>> +
>> + if (is_pebs_counter_event_group(event)) {
>> + __setup_pebs_counter_group(cpuc, event,
>> + (struct pebs_cntr_header *)cntr, next_record);
>> + data->sample_flags |= PERF_SAMPLE_READ;
>> + }
>> +
>> + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
>> + if (cntr->metrics == INTEL_CNTR_METRICS)
>> + nr += 2;
>> + next_record += nr * sizeof(u64);
>> + }
>> +
>> /* Parse followed fragments if there are. */
>> if (arch_pebs_record_continued(header)) {
>> at = at + header->size;
>> @@ -3094,10 +3119,8 @@ static void __init intel_ds_pebs_init(void)
>> break;
>>
>> case 6:
>> - if (x86_pmu.intel_cap.pebs_baseline) {
>> + if (x86_pmu.intel_cap.pebs_baseline)
>> x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
>> - x86_pmu.late_setup = intel_pmu_late_setup;
>> - }
> Hi Dapeng,
>
> I'm trying to understand why the late_setup initialization was changed
> here and its connection with counter group support. I couldn't find a
> mention in the commit message.

It's because arch-PEBS also supports counters group sampling, not just the
legacy PEBS with PEBS format 6. Currently ACR (auto counter reload) and
PEBS both needs the late_setup, ACR and counters group sampling (regardless
of legacy PEBS or arch-PEBS) are introduced since Perfmon v6, so the
late_setup initialization is moved to the unified place of perfmon v6
initialization. Thanks.


>
> Thanks,
> Ian
>
>> fallthrough;
>> case 5:
>> x86_pmu.pebs_ept = 1;
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index f1ef9ac38bfb..65cc528fbad8 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -334,12 +334,18 @@
>> #define ARCH_PEBS_INDEX_WR_SHIFT 4
>>
>> #define ARCH_PEBS_RELOAD 0xffffffff
>> +#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
>> +#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
>> +#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
>> +#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
>> #define ARCH_PEBS_LBR_SHIFT 40
>> #define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
>> #define ARCH_PEBS_VECR_XMM BIT_ULL(49)
>> #define ARCH_PEBS_GPR BIT_ULL(61)
>> #define ARCH_PEBS_AUX BIT_ULL(62)
>> #define ARCH_PEBS_EN BIT_ULL(63)
>> +#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
>> + ARCH_PEBS_CNTR_METRICS)
>>
>> #define MSR_IA32_RTIT_CTL 0x00000570
>> #define RTIT_CTL_TRACEEN BIT(0)
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index 3b3848f0d339..7276ba70c88a 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -141,16 +141,16 @@
>> #define ARCH_PERFMON_EVENTS_COUNT 7
>>
>> #define PEBS_DATACFG_MEMINFO BIT_ULL(0)
>> -#define PEBS_DATACFG_GP BIT_ULL(1)
>> +#define PEBS_DATACFG_GP BIT_ULL(1)
>> #define PEBS_DATACFG_XMMS BIT_ULL(2)
>> #define PEBS_DATACFG_LBRS BIT_ULL(3)
>> -#define PEBS_DATACFG_LBR_SHIFT 24
>> #define PEBS_DATACFG_CNTR BIT_ULL(4)
>> +#define PEBS_DATACFG_METRICS BIT_ULL(5)
>> +#define PEBS_DATACFG_LBR_SHIFT 24
>> #define PEBS_DATACFG_CNTR_SHIFT 32
>> #define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
>> #define PEBS_DATACFG_FIX_SHIFT 48
>> #define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
>> -#define PEBS_DATACFG_METRICS BIT_ULL(5)
>>
>> /* Steal the highest bit of pebs_data_cfg for SW usage */
>> #define PEBS_UPDATE_DS_SW BIT_ULL(63)
>> @@ -603,6 +603,13 @@ struct arch_pebs_lbr_header {
>> u64 ler_info;
>> };
>>
>> +struct arch_pebs_cntr_header {
>> + u32 cntr;
>> + u32 fixed;
>> + u32 metrics;
>> + u32 reserved;
>> +};
>> +
>> /*
>> * AMD Extended Performance Monitoring and Debug cpuid feature detection
>> */
>> --
>> 2.34.1
>>