Re: [PATCH v4 21/38] KVM: x86/pmu/vmx: Save/load guest IA32_PERF_GLOBAL_CTRL with vm_exit/entry_ctrl

From: Chen, Zide
Date: Wed Mar 26 2025 - 12:55:52 EST




On 3/24/2025 10:31 AM, Mingwei Zhang wrote:
> From: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
>
> Intel processor (vmx) provides capability to save/load guest
> IA32_PERF_GLOBAL_CTRL at vm-exit/vm-entry by setting
> VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL bit in VM-exit-ctrl or
> VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL bit in VM-entry-ctrl.
>
> Mediated vPMU leverages both capabilities to save/load guest
> IA32_PERF_GLOBAL_CTRL automatically at vm-exit/vm-entry. Note that the
> former was introduced in SapphireRapids and later Intel CPUs.
>
> If VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL is unavailable, mediated PMU will be
> disabled. Note that mediated PMU can be enabled by falling back to atomic
> msr save/retore list. However, that would cause extra overhead per
> VM-enter/exit.
>
> Since these VMX capability bits perform automatic saving/restoring of the
> PMU global ctrl between VMCS and the HW MSR. No synchronization was
> performed betwen HW MSR and pmu->global_ctrli, the KVM cached value .
> Therefore, whenever KVM needs to use this variable, it will need to
> explicitly read the value from MSR to pmu->global_ctrl. This is especially
> so when guest doesn't own all PMU counters, i.e., when
> IA32_PERF_GLOBAL_CTRL is interceped by mediated PMU.
>
> Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
> Co-developed-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 4 ++++
> arch/x86/include/asm/vmx.h | 1 +
> arch/x86/kvm/pmu.c | 30 ++++++++++++++++++++++++-
> arch/x86/kvm/vmx/capabilities.h | 5 +++++
> arch/x86/kvm/vmx/nested.c | 3 ++-
> arch/x86/kvm/vmx/pmu_intel.c | 39 ++++++++++++++++++++++++++++++++-
> arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++++++++-
> arch/x86/kvm/vmx/vmx.h | 3 ++-
> 8 files changed, 102 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0b7af5902ff7..4b3bfefc2d05 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -553,6 +553,10 @@ struct kvm_pmu {
> unsigned available_event_types;
> u64 fixed_ctr_ctrl;
> u64 fixed_ctr_ctrl_rsvd;
> + /*
> + * kvm_pmu_sync_global_ctrl_from_vmcs() must be called to update
> + * this SW-maintained global_ctrl for mediated vPMU before accessing it.
> + */
> u64 global_ctrl;
> u64 global_status;
> u64 counter_bitmask[2];
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index f7fd4369b821..48e137560f17 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -106,6 +106,7 @@
> #define VM_EXIT_CLEAR_BNDCFGS 0x00800000
> #define VM_EXIT_PT_CONCEAL_PIP 0x01000000
> #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
> +#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL 0x40000000
>
> #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 6ad71752be4b..4e8cefcce7ab 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -646,6 +646,30 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
> }
> }
>
> +static void kvm_pmu_sync_global_ctrl_from_vmcs(struct kvm_vcpu *vcpu)
> +{
> + struct msr_data msr_info = { .index = MSR_CORE_PERF_GLOBAL_CTRL };
> +
> + if (!kvm_mediated_pmu_enabled(vcpu))
> + return;
> +
> + /* Sync pmu->global_ctrl from GUEST_IA32_PERF_GLOBAL_CTRL. */
> + kvm_pmu_call(get_msr)(vcpu, &msr_info);
> +}
> +
> +static void kvm_pmu_sync_global_ctrl_to_vmcs(struct kvm_vcpu *vcpu, u64 global_ctrl)
> +{
> + struct msr_data msr_info = {
> + .index = MSR_CORE_PERF_GLOBAL_CTRL,
> + .data = global_ctrl };
> +
> + if (!kvm_mediated_pmu_enabled(vcpu))
> + return;
> +
> + /* Sync pmu->global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */
> + kvm_pmu_call(set_msr)(vcpu, &msr_info);
> +}
> +
> bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> {
> switch (msr) {
> @@ -680,7 +704,6 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = pmu->global_status;
> break;
> case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> - case MSR_CORE_PERF_GLOBAL_CTRL:
> msr_info->data = pmu->global_ctrl;
> break;
> case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> @@ -731,6 +754,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)


pmu->global_ctrl doesn't always have the up-to-date guest value, need to
sync from vmcs/vmbc before comparing it against 'data'.

+ kvm_pmu_sync_global_ctrl_from_vmcs(vcpu);
if (pmu->global_ctrl != data) {

> diff = pmu->global_ctrl ^ data;
> pmu->global_ctrl = data;
> reprogram_counters(pmu, diff);
> +
> + /* Propagate guest global_ctrl to GUEST_IA32_PERF_GLOBAL_CTRL. */
> + kvm_pmu_sync_global_ctrl_to_vmcs(vcpu, data);
> }
> break;
> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> @@ -907,6 +933,8 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
>
> BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
>
> + kvm_pmu_sync_global_ctrl_from_vmcs(vcpu);
> +
> if (!kvm_pmu_has_perf_global_ctrl(pmu))
> bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
> else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index 013536fde10b..cc63bd4ab87c 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -101,6 +101,11 @@ static inline bool cpu_has_load_perf_global_ctrl(void)
> return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
> }
>
> +static inline bool cpu_has_save_perf_global_ctrl(void)
> +{
> + return vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL;
> +}
> +
> static inline bool cpu_has_vmx_mpx(void)
> {
> return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 8a7af02d466e..ecf72394684d 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -7004,7 +7004,8 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf,
> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT |
> - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
> + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL;
>
> /* We support free control of debug control saving. */
> msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 2a5f79206b02..04a893e56135 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -294,6 +294,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> u32 msr = msr_info->index;
>
> switch (msr) {
> + case MSR_CORE_PERF_GLOBAL_CTRL:
> + if (kvm_mediated_pmu_enabled(vcpu))
> + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL);
> + msr_info->data = pmu->global_ctrl;
> + break;
> case MSR_CORE_PERF_FIXED_CTR_CTRL:
> msr_info->data = pmu->fixed_ctr_ctrl;
> break;
> @@ -339,6 +344,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> u64 reserved_bits, diff;
>
> switch (msr) {
> + case MSR_CORE_PERF_GLOBAL_CTRL:
> + if (kvm_mediated_pmu_enabled(vcpu))
> + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
> + pmu->global_ctrl);
> + break;
> case MSR_CORE_PERF_FIXED_CTR_CTRL:
> if (data & pmu->fixed_ctr_ctrl_rsvd)
> return 1;
> @@ -558,10 +568,37 @@ static void __intel_pmu_refresh(struct kvm_vcpu *vcpu)
>
> static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> {
> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + bool mediated;
> +
> __intel_pmu_refresh(vcpu);
>
> - exec_controls_changebit(to_vmx(vcpu), CPU_BASED_RDPMC_EXITING,
> + exec_controls_changebit(vmx, CPU_BASED_RDPMC_EXITING,
> !kvm_rdpmc_in_guest(vcpu));
> +
> + mediated = kvm_mediated_pmu_enabled(vcpu);
> + if (cpu_has_load_perf_global_ctrl()) {
> + vm_entry_controls_changebit(vmx,
> + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, mediated);
> + /*
> + * Initialize guest PERF_GLOBAL_CTRL to reset value as SDM rules.
> + *
> + * Note: GUEST_IA32_PERF_GLOBAL_CTRL must be initialized to
> + * "BIT_ULL(pmu->nr_arch_gp_counters) - 1" instead of pmu->global_ctrl
> + * since pmu->global_ctrl is only be initialized when guest
> + * pmu->version > 1. Otherwise if pmu->version is 1, pmu->global_ctrl
> + * is 0 and guest counters are never really enabled.
> + */
> + if (mediated)
> + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
> + BIT_ULL(pmu->nr_arch_gp_counters) - 1);
> + }
> +
> + if (cpu_has_save_perf_global_ctrl())
> + vm_exit_controls_changebit(vmx,
> + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, mediated);
> }
>
> static void intel_pmu_init(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ff66f17d6358..38ecf3c116bd 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4390,6 +4390,13 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
>
> if (cpu_has_load_ia32_efer())
> vmcs_write64(HOST_IA32_EFER, kvm_host.efer);
> +
> + /*
> + * Initialize host PERF_GLOBAL_CTRL to 0 to disable all counters
> + * immediately once VM exits. Mediated vPMU then call perf_guest_exit()
> + * to re-enable host perf events.
> + */
> + vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0);
> }
>
> void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
> @@ -4457,7 +4464,8 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
> VM_EXIT_CLEAR_IA32_RTIT_CTL);
> /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
> return vmexit_ctrl &
> - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
> + ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER |
> + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL);
> }
>
> void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
> @@ -7196,6 +7204,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
> struct perf_guest_switch_msr *msrs;
> struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
>
> + if (kvm_mediated_pmu_enabled(&vmx->vcpu))
> + return;
> +
> pmu->host_cross_mapped_mask = 0;
> if (pmu->pebs_enable & pmu->global_ctrl)
> intel_pmu_cross_mapped_check(pmu);
> @@ -8451,6 +8462,15 @@ __init int vmx_hardware_setup(void)
> enable_sgx = false;
> #endif
>
> + /*
> + * All CPUs that support a mediated PMU are expected to support loading
> + * and saving PERF_GLOBAL_CTRL via dedicated VMCS fields.
> + */
> + if (enable_mediated_pmu &&
> + (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl() ||
> + !cpu_has_save_perf_global_ctrl())))
> + enable_mediated_pmu = false;
> +
> /*
> * set_apic_access_page_addr() is used to reload apic access
> * page upon invalidation. No need to do anything if not
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 5c505af553c8..b282165f98a6 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void)
> VM_EXIT_LOAD_IA32_EFER | \
> VM_EXIT_CLEAR_BNDCFGS | \
> VM_EXIT_PT_CONCEAL_PIP | \
> - VM_EXIT_CLEAR_IA32_RTIT_CTL)
> + VM_EXIT_CLEAR_IA32_RTIT_CTL | \
> + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL)
>
> #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
> (PIN_BASED_EXT_INTR_MASK | \