[PATCH V2 2/3] perf/x86/intel: Add the enumeration and flag for the auto counter reload

From: kan . liang
Date: Thu Oct 10 2024 - 15:28:00 EST


From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

The counters that support the auto counter reload feature can be
enumerated in the CPUID Leaf 0x23 sub-leaf 0x2.

Add acr_cntr_mask to store the mask of counters which are reloadable.
Add acr_cntr_cause_mask to store the mask of counters which can cause
reload. Since the e-core and p-core may have different numbers of
counters, track the masks in the struct x86_hybrid_pmu as well.

The Auto Counter Reload feature requires a dynamic constraint. Add a PMU
flag to allocate the constraint_list.

There are many existing features which require a dynamic constraint as
well. Add a PMU_FL_DYN_MASK to include the flags of all the features.

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/core.c | 17 +++++++++++++++--
arch/x86/events/perf_event.h | 12 ++++++++++++
arch/x86/include/asm/perf_event.h | 2 ++
3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2f3bf3bbbd77..726ef13c2c81 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4775,7 +4775,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
return c;
}

-
+#define PMU_FL_DYN_MASK (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | \
+ PMU_FL_BR_CNTR | PMU_FL_ACR)
int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
{
cpuc->pebs_record_size = x86_pmu.pebs_record_size;
@@ -4786,7 +4787,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}

- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
+ if (x86_pmu.flags & PMU_FL_DYN_MASK) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);

cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -4893,6 +4894,18 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
pmu->fixed_cntr_mask64 = ebx;
}

+ if (sub_bitmaps & ARCH_PERFMON_ACR_LEAF) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF_BIT,
+ &eax, &ebx, &ecx, &edx);
+ /* The mask of the counters which can be reloaded */
+ pmu->acr_cntr_mask64 = eax | ((u64)ebx << INTEL_PMC_IDX_FIXED);
+
+ /* The mask of the counters which can cause a reload of reloadable counters */
+ pmu->acr_cntr_cause_mask = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
+
+ x86_pmu.flags |= PMU_FL_ACR;
+ }
+
if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 82c6f45ce975..1ee6d7bb10a3 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -718,6 +718,12 @@ struct x86_hybrid_pmu {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+
+ union {
+ u64 acr_cntr_mask64;
+ unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ u64 acr_cntr_cause_mask;
struct event_constraint unconstrained;

u64 hw_cache_event_ids
@@ -815,6 +821,11 @@ struct x86_pmu {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+ union {
+ u64 acr_cntr_mask64;
+ unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ u64 acr_cntr_cause_mask;
int cntval_bits;
u64 cntval_mask;
union {
@@ -1059,6 +1070,7 @@ do { \
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
+#define PMU_FL_ACR 0x800 /* Support auto-counter reload */

#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 41ace8431e01..19af3d857db3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -191,6 +191,8 @@ union cpuid10_edx {
#define ARCH_PERFMON_EXT_EQ 0x2
#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
#define ARCH_PERFMON_NUM_COUNTER_LEAF BIT(ARCH_PERFMON_NUM_COUNTER_LEAF_BIT)
+#define ARCH_PERFMON_ACR_LEAF_BIT 0x2
+#define ARCH_PERFMON_ACR_LEAF BIT(ARCH_PERFMON_ACR_LEAF_BIT)

/*
* Intel Architectural LBR CPUID detection/enumeration details:
--
2.38.1