Re: [RFC PATCH 26/41] KVM: x86/pmu: Add host_perf_cap field in kvm_caps to record host PMU capability

From: Sean Christopherson
Date: Thu Apr 11 2024 - 17:49:21 EST


On Fri, Jan 26, 2024, Xiong Zhang wrote:
> From: Mingwei Zhang <mizhang@xxxxxxxxxx>
>
> Add host_perf_cap field in kvm_caps to record host PMU capability. This
> helps KVM recognize the PMU capability difference between host and guest.
> This awareness improves performance in PMU context switch. In particular,
> KVM will need to zero out all MSRs that guest PMU does not use but host PMU
> does use. Having the host PMU feature set cached in host_perf_cap in
> kvm_caps structure saves a rdmsrl() to IA32_PERF_CAPABILITY MSR on each PMU
> context switch. In addition, this is more convenient approach than open
> another API on the host perf subsystem side.
>
> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> ---
> arch/x86/kvm/vmx/vmx.c | 17 +++++++++--------
> arch/x86/kvm/x86.h | 1 +
> 2 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 349954f90fe9..50100954cd92 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7896,32 +7896,33 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> vmx_update_exception_bitmap(vcpu);
> }
>
> -static u64 vmx_get_perf_capabilities(void)
> +static void vmx_get_perf_capabilities(void)
> {
> u64 perf_cap = PMU_CAP_FW_WRITES;
> struct x86_pmu_lbr lbr;
> - u64 host_perf_cap = 0;
> +
> + kvm_caps.host_perf_cap = 0;
>
> if (!enable_pmu)
> - return 0;
> + return;
>
> if (boot_cpu_has(X86_FEATURE_PDCM))
> - rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
> + rdmsrl(MSR_IA32_PERF_CAPABILITIES, kvm_caps.host_perf_cap);

I would strongly prefer KVM snapshot the host's MSR_IA32_PERF_CAPABILITIES, if
the CPU has PDMC, i.e. not leave it zero if the PMU is disabled.

>
> if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR) &&
> !enable_passthrough_pmu) {
> x86_perf_get_lbr(&lbr);
> if (lbr.nr)
> - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
> + perf_cap |= kvm_caps.host_perf_cap & PMU_CAP_LBR_FMT;
> }
>
> if (vmx_pebs_supported() && !enable_passthrough_pmu) {
> - perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
> + perf_cap |= kvm_caps.host_perf_cap & PERF_CAP_PEBS_MASK;
> if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
> perf_cap &= ~PERF_CAP_PEBS_BASELINE;
> }
>
> - return perf_cap;
> + kvm_caps.supported_perf_cap = perf_cap;
> }
>
> static __init void vmx_set_cpu_caps(void)
> @@ -7946,7 +7947,7 @@ static __init void vmx_set_cpu_caps(void)
>
> if (!enable_pmu)
> kvm_cpu_cap_clear(X86_FEATURE_PDCM);
> - kvm_caps.supported_perf_cap = vmx_get_perf_capabilities();
> + vmx_get_perf_capabilities();
>
> if (!enable_sgx) {
> kvm_cpu_cap_clear(X86_FEATURE_SGX);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 38b73e98eae9..a29eb0469d7e 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -28,6 +28,7 @@ struct kvm_caps {
> u64 supported_mce_cap;
> u64 supported_xcr0;
> u64 supported_xss;
> + u64 host_perf_cap;
> u64 supported_perf_cap;

This is confusing, host_perf_cap doesn't track "capabilities" so much as it tracks
a raw host value. Luckily, I have a series that I am going to post this week
that adds another struct for tracking host values, e.g. host_xss, host_efer, etc.

> };
>
> --
> 2.34.1
>