Re: [Patch v9 11/12] perf/x86/intel: Setup PEBS data configuration and enable legacy groups

From: Ian Rogers

Date: Wed Mar 04 2026 - 20:21:25 EST


On Wed, Oct 29, 2025 at 3:25 AM Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> wrote:
>
> Different with legacy PEBS, arch-PEBS provides per-counter PEBS data
> configuration by programing MSR IA32_PMC_GPx/FXx_CFG_C MSRs.
>
> This patch obtains PEBS data configuration from event attribute and then
> writes the PEBS data configuration to MSR IA32_PMC_GPx/FXx_CFG_C and
> enable corresponding PEBS groups.
>
> Please notice this patch only enables XMM SIMD regs sampling for
> arch-PEBS, the other SIMD regs (OPMASK/YMM/ZMM) sampling on arch-PEBS
> would be supported after PMI based SIMD regs (OPMASK/YMM/ZMM) sampling
> is supported.
>
> Co-developed-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
> ---
> arch/x86/events/intel/core.c | 136 ++++++++++++++++++++++++++++++-
> arch/x86/events/intel/ds.c | 17 ++++
> arch/x86/events/perf_event.h | 4 +
> arch/x86/include/asm/intel_ds.h | 7 ++
> arch/x86/include/asm/msr-index.h | 8 ++
> 5 files changed, 171 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 40ccfd80d554..75cba28b86d5 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
> cpuc->fixed_ctrl_val &= ~mask;
> }
>
> +static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> + u32 msr;
> +
> + if (idx < INTEL_PMC_IDX_FIXED) {
> + msr = MSR_IA32_PMC_V6_GP0_CFG_C +
> + x86_pmu.addr_offset(idx, false);
> + } else {
> + msr = MSR_IA32_PMC_V6_FX0_CFG_C +
> + x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
> + }
> +
> + cpuc->cfg_c_val[idx] = ext;
> + wrmsrq(msr, ext);
> +}
> +
> +static void intel_pmu_disable_event_ext(struct perf_event *event)
> +{
> + if (!x86_pmu.arch_pebs)
> + return;
> +
> + /*
> + * Only clear CFG_C MSR for PEBS counter group events,
> + * it avoids the HW counter's value to be added into
> + * other PEBS records incorrectly after PEBS counter
> + * group events are disabled.
> + *
> + * For other events, it's unnecessary to clear CFG_C MSRs
> + * since CFG_C doesn't take effect if counter is in
> + * disabled state. That helps to reduce the WRMSR overhead
> + * in context switches.
> + */
> + if (!is_pebs_counter_event_group(event))
> + return;
> +
> + __intel_pmu_update_event_ext(event->hw.idx, 0);
> +}
> +
> static void intel_pmu_disable_event(struct perf_event *event)
> {
> struct hw_perf_event *hwc = &event->hw;
> @@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
> switch (idx) {
> case 0 ... INTEL_PMC_IDX_FIXED - 1:
> intel_clear_masks(event, idx);
> + intel_pmu_disable_event_ext(event);
> x86_pmu_disable_event(event);
> break;
> case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
> + intel_pmu_disable_event_ext(event);
> + fallthrough;
> case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
> intel_pmu_disable_fixed(event);
> break;
> @@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
>
> DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
>
> +static void intel_pmu_enable_event_ext(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> + struct hw_perf_event *hwc = &event->hw;
> + union arch_pebs_index old, new;
> + struct arch_pebs_cap cap;
> + u64 ext = 0;
> +
> + if (!x86_pmu.arch_pebs)
> + return;
> +
> + cap = hybrid(cpuc->pmu, arch_pebs_cap);
> +
> + if (event->attr.precise_ip) {
> + u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
> +
> + ext |= ARCH_PEBS_EN;
> + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
> + ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;

Nit: Perhaps there should be a warning if "hwc->sample_period >
ARCH_PEBS_RELOAD"?

Thanks,
Ian

> +
> + if (pebs_data_cfg && cap.caps) {
> + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
> + ext |= ARCH_PEBS_AUX & cap.caps;
> +
> + if (pebs_data_cfg & PEBS_DATACFG_GP)
> + ext |= ARCH_PEBS_GPR & cap.caps;
> +
> + if (pebs_data_cfg & PEBS_DATACFG_XMMS)
> + ext |= ARCH_PEBS_VECR_XMM & cap.caps;
> +
> + if (pebs_data_cfg & PEBS_DATACFG_LBRS)
> + ext |= ARCH_PEBS_LBR & cap.caps;
> + }
> +
> + if (cpuc->n_pebs == cpuc->n_large_pebs)
> + new.thresh = ARCH_PEBS_THRESH_MULTI;
> + else
> + new.thresh = ARCH_PEBS_THRESH_SINGLE;
> +
> + rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
> + if (new.thresh != old.thresh || !old.en) {
> + if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
> + /*
> + * Large PEBS was enabled.
> + * Drain PEBS buffer before applying the single PEBS.
> + */
> + intel_pmu_drain_pebs_buffer();
> + } else {
> + new.wr = 0;
> + new.full = 0;
> + new.en = 1;
> + wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
> + }
> + }
> + }
> +
> + if (cpuc->cfg_c_val[hwc->idx] != ext)
> + __intel_pmu_update_event_ext(hwc->idx, ext);
> +}
> +
> static void intel_pmu_enable_event(struct perf_event *event)
> {
> u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
> @@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
> enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
> intel_set_masks(event, idx);
> static_call_cond(intel_pmu_enable_acr_event)(event);
> + intel_pmu_enable_event_ext(event);
> __x86_pmu_enable_event(hwc, enable_mask);
> break;
> case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
> static_call_cond(intel_pmu_enable_acr_event)(event);
> + intel_pmu_enable_event_ext(event);
> fallthrough;
> case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
> intel_pmu_enable_fixed(event);
> @@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
> return false;
> }
>
> +static inline void __intel_update_pmu_caps(struct pmu *pmu)
> +{
> + struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
> +
> + if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
> + dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
> +}
> +
> +static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
> +{
> + u64 caps = hybrid(pmu, arch_pebs_cap).caps;
> +
> + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
> + if (caps & ARCH_PEBS_LBR)
> + x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
> +
> + if (!(caps & ARCH_PEBS_AUX))
> + x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
> + if (!(caps & ARCH_PEBS_GPR)) {
> + x86_pmu.large_pebs_flags &=
> + ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
> + }
> +}
> +
> #define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
>
> static void update_pmu_cap(struct pmu *pmu)
> @@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
> hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
> hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
>
> - if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
> + if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
> x86_pmu.arch_pebs = 0;
> + } else {
> + __intel_update_pmu_caps(pmu);
> + __intel_update_large_pebs_flags(pmu);
> + }
> } else {
> WARN_ON(x86_pmu.arch_pebs == 1);
> x86_pmu.arch_pebs = 0;
> @@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
> }
> }
>
> + __intel_update_pmu_caps(cpuc->pmu);
> +
> if (!cpuc->shared_regs)
> return;
>
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 1179980f795b..c66e9b562de3 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -1528,6 +1528,18 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
> }
> }
>
> +u64 intel_get_arch_pebs_data_config(struct perf_event *event)
> +{
> + u64 pebs_data_cfg = 0;
> +
> + if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
> + return 0;
> +
> + pebs_data_cfg |= pebs_update_adaptive_cfg(event);
> +
> + return pebs_data_cfg;
> +}
> +
> void intel_pmu_pebs_add(struct perf_event *event)
> {
> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> @@ -2947,6 +2959,11 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>
> index.wr = 0;
> index.full = 0;
> + index.en = 1;
> + if (cpuc->n_pebs == cpuc->n_large_pebs)
> + index.thresh = ARCH_PEBS_THRESH_MULTI;
> + else
> + index.thresh = ARCH_PEBS_THRESH_SINGLE;
> wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
>
> mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index 13f411bca6bc..3161ec0a3416 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -304,6 +304,8 @@ struct cpu_hw_events {
> /* Intel ACR configuration */
> u64 acr_cfg_b[X86_PMC_IDX_MAX];
> u64 acr_cfg_c[X86_PMC_IDX_MAX];
> + /* Cached CFG_C values */
> + u64 cfg_c_val[X86_PMC_IDX_MAX];
>
> /*
> * Intel LBR bits
> @@ -1782,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
>
> void intel_pmu_pebs_data_source_lnl(void);
>
> +u64 intel_get_arch_pebs_data_config(struct perf_event *event);
> +
> int intel_pmu_setup_lbr_filter(struct perf_event *event);
>
> void intel_pt_interrupt(void);
> diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
> index 023c2883f9f3..695f87efbeb8 100644
> --- a/arch/x86/include/asm/intel_ds.h
> +++ b/arch/x86/include/asm/intel_ds.h
> @@ -7,6 +7,13 @@
> #define PEBS_BUFFER_SHIFT 4
> #define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
>
> +/*
> + * The largest PEBS record could consume a page, ensure
> + * a record at least can be written after triggering PMI.
> + */
> +#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
> +#define ARCH_PEBS_THRESH_SINGLE 1
> +
> /* The maximal number of PEBS events: */
> #define MAX_PEBS_EVENTS_FMT4 8
> #define MAX_PEBS_EVENTS 32
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index fc7a4e7c718d..f1ef9ac38bfb 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -333,6 +333,14 @@
> #define ARCH_PEBS_OFFSET_MASK 0x7fffff
> #define ARCH_PEBS_INDEX_WR_SHIFT 4
>
> +#define ARCH_PEBS_RELOAD 0xffffffff
> +#define ARCH_PEBS_LBR_SHIFT 40
> +#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
> +#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
> +#define ARCH_PEBS_GPR BIT_ULL(61)
> +#define ARCH_PEBS_AUX BIT_ULL(62)
> +#define ARCH_PEBS_EN BIT_ULL(63)
> +
> #define MSR_IA32_RTIT_CTL 0x00000570
> #define RTIT_CTL_TRACEEN BIT(0)
> #define RTIT_CTL_CYCLEACC BIT(1)
> --
> 2.34.1
>