[PATCH v6 2/8] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU

From: Zide Chen

Date: Mon Jun 29 2026 - 19:31:06 EST


From: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>

Starting with Ice Lake, Intel introduced fixed counter 3, which counts
TOPDOWN.SLOTS - the number of available slots for an unhalted logical
processor. It serves as the denominator for top-level metrics in the
Top-down Microarchitecture Analysis method.

Emulating this counter on legacy vPMU would require introducing a new
generic perf encoding for the Intel-specific TOPDOWN.SLOTS event in
order to call perf_get_hw_event_config(). This is undesirable as it
would pollute the generic perf event encoding.

Moreover, KVM does not intend to emulate IA32_PERF_METRICS in the
legacy vPMU model, and without IA32_PERF_METRICS, emulating this
counter has little practical value. Therefore, expose fixed counter
3 to guests only when mediated vPMU is enabled.

Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
Co-developed-by: Zide Chen <zide.chen@xxxxxxxxx>
Signed-off-by: Zide Chen <zide.chen@xxxxxxxxx>
---
v6:
- Update comments to replace 2 with KVM_MAX_NR_INTEL_FIXED_COUNTERS - 1.
v3:
- Move the non-contiguous counter filter code to pmu.c
v2:
- Don't advertise fixed counter 3 to userspace if the host doesn't
support it.
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/msrs.c | 4 ++--
arch/x86/kvm/pmu.c | 18 +++++++++++++++++-
3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d8700eb848b4..dc9e4e8bfc07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -609,7 +609,7 @@ struct kvm_pmc {
#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
KVM_MAX_NR_AMD_GP_COUNTERS)

-#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 4
#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
KVM_MAX_NR_AMD_FIXED_COUNTERS)
diff --git a/arch/x86/kvm/msrs.c b/arch/x86/kvm/msrs.c
index c230b18d87e3..3bf42d90ad14 100644
--- a/arch/x86/kvm/msrs.c
+++ b/arch/x86/kvm/msrs.c
@@ -228,7 +228,7 @@ static const u32 msrs_to_save_base[] = {

static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
- MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
+ MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
@@ -2688,7 +2688,7 @@ void kvm_init_msr_lists(void)
{
unsigned i;

- BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 3,
+ BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 4,
"Please update the fixed PMCs in msrs_to_save_pmu[]");

num_msrs_to_save = 0;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 62d0ed99ebe9..f82ba63767d0 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -99,7 +99,8 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
* all perf counters (both gp and fixed). The mapping relationship
* between pmc and perf counters is as the following:
* * Intel: [0 .. KVM_MAX_NR_INTEL_GP_COUNTERS-1] <=> gp counters
- * [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
+ * [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX +
+ * KVM_MAX_NR_INTEL_FIXED_COUNTERS - 1] <=> fixed
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
*/
@@ -134,6 +135,8 @@ void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
{
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
+ union cpuid10_edx edx;
+ u32 eax, ebx, ecx;

/*
* Hybrid PMUs don't play nice with virtualization without careful
@@ -181,6 +184,19 @@ void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
KVM_MAX_NR_FIXED_COUNTERS);

+ /*
+ * Currently, KVM doesn't support non-contiguous fixed counters; make
+ * sure only contiguous ones are retained in kvm_pmu_cap.
+ */
+ if (kvm_host_pmu.version >= 5) {
+ cpuid(0xa, &eax, &ebx, &ecx, &edx.full);
+ if (kvm_pmu_cap.num_counters_fixed > edx.split.num_counters_fixed)
+ kvm_pmu_cap.num_counters_fixed = edx.split.num_counters_fixed;
+ }
+
+ if (!enable_mediated_pmu && kvm_pmu_cap.num_counters_fixed > 3)
+ kvm_pmu_cap.num_counters_fixed = 3;
+
kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
--
2.54.0