Re: [Patch v9 11/12] perf/x86/intel: Setup PEBS data configuration and enable legacy groups
From: Mi, Dapeng
Date: Thu Mar 05 2026 - 21:17:44 EST
On 3/5/2026 9:20 AM, Ian Rogers wrote:
> On Wed, Oct 29, 2025 at 3:25 AM Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> wrote:
>> Different with legacy PEBS, arch-PEBS provides per-counter PEBS data
>> configuration by programing MSR IA32_PMC_GPx/FXx_CFG_C MSRs.
>>
>> This patch obtains PEBS data configuration from event attribute and then
>> writes the PEBS data configuration to MSR IA32_PMC_GPx/FXx_CFG_C and
>> enable corresponding PEBS groups.
>>
>> Please notice this patch only enables XMM SIMD regs sampling for
>> arch-PEBS, the other SIMD regs (OPMASK/YMM/ZMM) sampling on arch-PEBS
>> would be supported after PMI based SIMD regs (OPMASK/YMM/ZMM) sampling
>> is supported.
>>
>> Co-developed-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
>> ---
>> arch/x86/events/intel/core.c | 136 ++++++++++++++++++++++++++++++-
>> arch/x86/events/intel/ds.c | 17 ++++
>> arch/x86/events/perf_event.h | 4 +
>> arch/x86/include/asm/intel_ds.h | 7 ++
>> arch/x86/include/asm/msr-index.h | 8 ++
>> 5 files changed, 171 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 40ccfd80d554..75cba28b86d5 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
>> cpuc->fixed_ctrl_val &= ~mask;
>> }
>>
>> +static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
>> +{
>> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> + u32 msr;
>> +
>> + if (idx < INTEL_PMC_IDX_FIXED) {
>> + msr = MSR_IA32_PMC_V6_GP0_CFG_C +
>> + x86_pmu.addr_offset(idx, false);
>> + } else {
>> + msr = MSR_IA32_PMC_V6_FX0_CFG_C +
>> + x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
>> + }
>> +
>> + cpuc->cfg_c_val[idx] = ext;
>> + wrmsrq(msr, ext);
>> +}
>> +
>> +static void intel_pmu_disable_event_ext(struct perf_event *event)
>> +{
>> + if (!x86_pmu.arch_pebs)
>> + return;
>> +
>> + /*
>> + * Only clear CFG_C MSR for PEBS counter group events,
>> + * it avoids the HW counter's value to be added into
>> + * other PEBS records incorrectly after PEBS counter
>> + * group events are disabled.
>> + *
>> + * For other events, it's unnecessary to clear CFG_C MSRs
>> + * since CFG_C doesn't take effect if counter is in
>> + * disabled state. That helps to reduce the WRMSR overhead
>> + * in context switches.
>> + */
>> + if (!is_pebs_counter_event_group(event))
>> + return;
>> +
>> + __intel_pmu_update_event_ext(event->hw.idx, 0);
>> +}
>> +
>> static void intel_pmu_disable_event(struct perf_event *event)
>> {
>> struct hw_perf_event *hwc = &event->hw;
>> @@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
>> switch (idx) {
>> case 0 ... INTEL_PMC_IDX_FIXED - 1:
>> intel_clear_masks(event, idx);
>> + intel_pmu_disable_event_ext(event);
>> x86_pmu_disable_event(event);
>> break;
>> case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
>> + intel_pmu_disable_event_ext(event);
>> + fallthrough;
>> case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
>> intel_pmu_disable_fixed(event);
>> break;
>> @@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
>>
>> DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
>>
>> +static void intel_pmu_enable_event_ext(struct perf_event *event)
>> +{
>> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> + struct hw_perf_event *hwc = &event->hw;
>> + union arch_pebs_index old, new;
>> + struct arch_pebs_cap cap;
>> + u64 ext = 0;
>> +
>> + if (!x86_pmu.arch_pebs)
>> + return;
>> +
>> + cap = hybrid(cpuc->pmu, arch_pebs_cap);
>> +
>> + if (event->attr.precise_ip) {
>> + u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
>> +
>> + ext |= ARCH_PEBS_EN;
>> + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
>> + ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
> Nit: Perhaps there should be a warning if "hwc->sample_period >
> ARCH_PEBS_RELOAD"?
Hmm, strictly speaking, we should not check if hwc->sample_period is larger
than ARCH_PEBS_RELOAD since it's allowed even hwc->sample_period is larger
than ARCH_PEBS_RELOAD as long as hwc->sample_period is not larger than
x86_pmu.max_period (which is "x86_pmu.cntval_mask >> 1" for modern Intel
platforms). But we indeed need a sample_period check for arch-PEBS, just
like what the below ACR does.
```
/* The reload value cannot exceeds the max period */
if (event->attr.sample_period > x86_pmu.max_period)
return -EINVAL;
```
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/events/intel/core.c?h=v7.0-rc2#n4789
I would add a patch to do this check. Thanks.
>
> Thanks,
> Ian
>
>> +
>> + if (pebs_data_cfg && cap.caps) {
>> + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
>> + ext |= ARCH_PEBS_AUX & cap.caps;
>> +
>> + if (pebs_data_cfg & PEBS_DATACFG_GP)
>> + ext |= ARCH_PEBS_GPR & cap.caps;
>> +
>> + if (pebs_data_cfg & PEBS_DATACFG_XMMS)
>> + ext |= ARCH_PEBS_VECR_XMM & cap.caps;
>> +
>> + if (pebs_data_cfg & PEBS_DATACFG_LBRS)
>> + ext |= ARCH_PEBS_LBR & cap.caps;
>> + }
>> +
>> + if (cpuc->n_pebs == cpuc->n_large_pebs)
>> + new.thresh = ARCH_PEBS_THRESH_MULTI;
>> + else
>> + new.thresh = ARCH_PEBS_THRESH_SINGLE;
>> +
>> + rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
>> + if (new.thresh != old.thresh || !old.en) {
>> + if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
>> + /*
>> + * Large PEBS was enabled.
>> + * Drain PEBS buffer before applying the single PEBS.
>> + */
>> + intel_pmu_drain_pebs_buffer();
>> + } else {
>> + new.wr = 0;
>> + new.full = 0;
>> + new.en = 1;
>> + wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
>> + }
>> + }
>> + }
>> +
>> + if (cpuc->cfg_c_val[hwc->idx] != ext)
>> + __intel_pmu_update_event_ext(hwc->idx, ext);
>> +}
>> +
>> static void intel_pmu_enable_event(struct perf_event *event)
>> {
>> u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
>> @@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
>> enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
>> intel_set_masks(event, idx);
>> static_call_cond(intel_pmu_enable_acr_event)(event);
>> + intel_pmu_enable_event_ext(event);
>> __x86_pmu_enable_event(hwc, enable_mask);
>> break;
>> case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
>> static_call_cond(intel_pmu_enable_acr_event)(event);
>> + intel_pmu_enable_event_ext(event);
>> fallthrough;
>> case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
>> intel_pmu_enable_fixed(event);
>> @@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
>> return false;
>> }
>>
>> +static inline void __intel_update_pmu_caps(struct pmu *pmu)
>> +{
>> + struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
>> +
>> + if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
>> + dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
>> +}
>> +
>> +static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
>> +{
>> + u64 caps = hybrid(pmu, arch_pebs_cap).caps;
>> +
>> + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
>> + if (caps & ARCH_PEBS_LBR)
>> + x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
>> +
>> + if (!(caps & ARCH_PEBS_AUX))
>> + x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
>> + if (!(caps & ARCH_PEBS_GPR)) {
>> + x86_pmu.large_pebs_flags &=
>> + ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
>> + }
>> +}
>> +
>> #define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
>>
>> static void update_pmu_cap(struct pmu *pmu)
>> @@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
>> hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
>> hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
>>
>> - if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
>> + if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
>> x86_pmu.arch_pebs = 0;
>> + } else {
>> + __intel_update_pmu_caps(pmu);
>> + __intel_update_large_pebs_flags(pmu);
>> + }
>> } else {
>> WARN_ON(x86_pmu.arch_pebs == 1);
>> x86_pmu.arch_pebs = 0;
>> @@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
>> }
>> }
>>
>> + __intel_update_pmu_caps(cpuc->pmu);
>> +
>> if (!cpuc->shared_regs)
>> return;
>>
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index 1179980f795b..c66e9b562de3 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -1528,6 +1528,18 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
>> }
>> }
>>
>> +u64 intel_get_arch_pebs_data_config(struct perf_event *event)
>> +{
>> + u64 pebs_data_cfg = 0;
>> +
>> + if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
>> + return 0;
>> +
>> + pebs_data_cfg |= pebs_update_adaptive_cfg(event);
>> +
>> + return pebs_data_cfg;
>> +}
>> +
>> void intel_pmu_pebs_add(struct perf_event *event)
>> {
>> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> @@ -2947,6 +2959,11 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>>
>> index.wr = 0;
>> index.full = 0;
>> + index.en = 1;
>> + if (cpuc->n_pebs == cpuc->n_large_pebs)
>> + index.thresh = ARCH_PEBS_THRESH_MULTI;
>> + else
>> + index.thresh = ARCH_PEBS_THRESH_SINGLE;
>> wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
>>
>> mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
>> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
>> index 13f411bca6bc..3161ec0a3416 100644
>> --- a/arch/x86/events/perf_event.h
>> +++ b/arch/x86/events/perf_event.h
>> @@ -304,6 +304,8 @@ struct cpu_hw_events {
>> /* Intel ACR configuration */
>> u64 acr_cfg_b[X86_PMC_IDX_MAX];
>> u64 acr_cfg_c[X86_PMC_IDX_MAX];
>> + /* Cached CFG_C values */
>> + u64 cfg_c_val[X86_PMC_IDX_MAX];
>>
>> /*
>> * Intel LBR bits
>> @@ -1782,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
>>
>> void intel_pmu_pebs_data_source_lnl(void);
>>
>> +u64 intel_get_arch_pebs_data_config(struct perf_event *event);
>> +
>> int intel_pmu_setup_lbr_filter(struct perf_event *event);
>>
>> void intel_pt_interrupt(void);
>> diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
>> index 023c2883f9f3..695f87efbeb8 100644
>> --- a/arch/x86/include/asm/intel_ds.h
>> +++ b/arch/x86/include/asm/intel_ds.h
>> @@ -7,6 +7,13 @@
>> #define PEBS_BUFFER_SHIFT 4
>> #define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
>>
>> +/*
>> + * The largest PEBS record could consume a page, ensure
>> + * a record at least can be written after triggering PMI.
>> + */
>> +#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
>> +#define ARCH_PEBS_THRESH_SINGLE 1
>> +
>> /* The maximal number of PEBS events: */
>> #define MAX_PEBS_EVENTS_FMT4 8
>> #define MAX_PEBS_EVENTS 32
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index fc7a4e7c718d..f1ef9ac38bfb 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -333,6 +333,14 @@
>> #define ARCH_PEBS_OFFSET_MASK 0x7fffff
>> #define ARCH_PEBS_INDEX_WR_SHIFT 4
>>
>> +#define ARCH_PEBS_RELOAD 0xffffffff
>> +#define ARCH_PEBS_LBR_SHIFT 40
>> +#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
>> +#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
>> +#define ARCH_PEBS_GPR BIT_ULL(61)
>> +#define ARCH_PEBS_AUX BIT_ULL(62)
>> +#define ARCH_PEBS_EN BIT_ULL(63)
>> +
>> #define MSR_IA32_RTIT_CTL 0x00000570
>> #define RTIT_CTL_TRACEEN BIT(0)
>> #define RTIT_CTL_CYCLEACC BIT(1)
>> --
>> 2.34.1
>>