Re: [PATCH] KVM: x86/pmu: Add Intel PMU supported fixed counters bit mask
From: Sean Christopherson
Date: Fri Mar 24 2023 - 18:46:48 EST
On Tue, Mar 21, 2023, Like Xu wrote:
> From: Like Xu <likexu@xxxxxxxxxxx>
>
> Per Intel SDM, fixed-function performance counter 'i' is supported if:
>
> FxCtr[i]_is_supported := ECX[i] || (EDX[4:0] > i);
>
> which means that the KVM user space can use EDX to limit the number of
> fixed counters and at the same time, using ECX to enable part of other
> KVM supported fixed counters.
>
> Add a bitmap (instead of always checking the vcpu's CPUIDs) to keep track
> of the guest available fixed counters and perform the semantic checks.
>
> Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/pmu.h | 8 +++++
> arch/x86/kvm/vmx/pmu_intel.c | 53 +++++++++++++++++++++------------
> 3 files changed, 44 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index a45de1118a42..14689e583127 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -565,6 +565,8 @@ struct kvm_pmu {
> */
> bool need_cleanup;
>
> + DECLARE_BITMAP(supported_fixed_pmc_idx, KVM_PMC_MAX_FIXED);
Why add a new bitmap? all_valid_pmc_idx is a strict superset, no?
> +static inline bool fixed_ctr_is_supported(struct kvm_pmu *pmu, unsigned int idx)
> +{
> + return test_bit(idx, pmu->supported_fixed_pmc_idx);
> +}
> +
> /* returns fixed PMC with the specified MSR */
> static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
> {
> @@ -120,6 +125,9 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
> u32 index = array_index_nospec(msr - base,
> pmu->nr_arch_fixed_counters);
>
> + if (!fixed_ctr_is_supported(pmu, index))
> + return NULL;
> +
> return &pmu->fixed_counters[index];
> }
>
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index e8a3be0b9df9..12f4b2fe7756 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -43,13 +43,16 @@ static int fixed_pmc_events[] = {1, 0, 7};
> static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
> {
> struct kvm_pmc *pmc;
> - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
> + u8 new_ctrl, old_ctrl, old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
> int i;
>
> pmu->fixed_ctr_ctrl = data;
> for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> - u8 new_ctrl = fixed_ctrl_field(data, i);
> - u8 old_ctrl = fixed_ctrl_field(old_fixed_ctr_ctrl, i);
Please keep the variable declarations in the loop (I've had to eat my words about
variable shadowing), e.g. to prevent accessing new_ctrl or old_ctrl after the loop.
> + if (!fixed_ctr_is_supported(pmu, i))
> + continue;
> +
> + new_ctrl = fixed_ctrl_field(data, i);
> + old_ctrl = fixed_ctrl_field(old_fixed_ctr_ctrl, i);
>
> if (old_ctrl == new_ctrl)
> continue;
> @@ -125,6 +128,9 @@ static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
>
> idx &= ~(3u << 30);
>
> + if (fixed && !fixed_ctr_is_supported(pmu, idx))
> + return false;
> +
> return fixed ? idx < pmu->nr_arch_fixed_counters
> : idx < pmu->nr_arch_gp_counters;
> }
> @@ -145,7 +151,7 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
> counters = pmu->gp_counters;
> num_counters = pmu->nr_arch_gp_counters;
> }
> - if (idx >= num_counters)
> + if (idx >= num_counters || (fixed && !fixed_ctr_is_supported(pmu, idx)))
> return NULL;
> *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
> return &counters[array_index_nospec(idx, num_counters)];
> @@ -500,6 +506,9 @@ static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
> int i;
>
> for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> + if (!fixed_ctr_is_supported(pmu, i))
> + continue;
> +
> pmc = &pmu->fixed_counters[i];
> event = fixed_pmc_events[array_index_nospec(i, size)];
> pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
> @@ -520,6 +529,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>
> pmu->nr_arch_gp_counters = 0;
> pmu->nr_arch_fixed_counters = 0;
> + bitmap_zero(pmu->supported_fixed_pmc_idx, KVM_PMC_MAX_FIXED);
Side topic, isn't KVM missing a bitmap_zero() on all_valid_pmc_idx?
> pmu->counter_bitmask[KVM_PMC_GP] = 0;
> pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
> pmu->version = 0;
...
> + if (pmu->version > 1) {
> pmu->nr_arch_fixed_counters =
> - min3(ARRAY_SIZE(fixed_pmc_events),
> - (size_t) edx.split.num_counters_fixed,
> - (size_t)kvm_pmu_cap.num_counters_fixed);
> + min_t(int, ARRAY_SIZE(fixed_pmc_events),
> + kvm_pmu_cap.num_counters_fixed);
Leaving nr_arch_fixed_counters set to kvm_pmu_cap.num_counters_fixed seems odd.
E.g. if userspace reduces the number of counters to 1, shouldn't that be reflected
in the PMU metadata?
Ah, you did that so the iteration works. That's super confusing. And silly,
because in the end, pmu->nr_arch_fixed_counters is just kvm_pmu_cap.num_counters_fixed.
THis holds true:
BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
and KVM does this
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
KVM_PMC_MAX_FIXED);
ergo the above min_t() is a nop.
One option would be to just use kvm_pmu_cap.num_counters_fixed, but I think it
makes more sense to use for_each_set_bit(), or perhaps for_each_set_bitrange_from()
if all_valid_pmc_idx is used.
And then end up with something like so:
static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
idx &= ~(3u << 30);
return fixed ? fixed_ctr_is_supported(pmu, idx)
: idx < pmu->nr_arch_gp_counters;
}
static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
struct kvm_pmc *counters;
unsigned int num_counters;
if (!intel_is_valid_rdpmc_ecx(vcpu, idx))
return NULL;
idx &= ~(3u << 30);
if (fixed) {
counters = pmu->fixed_counters;
num_counters = kvm_pmu_cap.num_counters_fixed;
} else {
counters = pmu->gp_counters;
num_counters = pmu->nr_arch_gp_counters;
}
*mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
return &counters[array_index_nospec(idx, num_counters)];
}