Re: [PATCH v6 10/20] KVM: selftests: Test Intel PMU architectural events on gp counters

From: Jim Mattson
Date: Sat Nov 04 2023 - 09:29:42 EST


On Fri, Nov 3, 2023 at 5:03 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote:
>
> From: Jinrong Liang <cloudliang@xxxxxxxxxxx>
>
> Add test cases to verify that Intel's Architectural PMU events work as
> expected when the are (un)available according to guest CPUID. Iterate
> over a range of sane PMU versions, with and without full-width writes
> enabled, and over interesting combinations of lengths/masks for the bit
> vector that enumerates unavailable events.
>
> Test up to vPMU version 5, i.e. the current architectural max. KVM only
> officially supports up to version 2, but the behavior of the counters is
> backwards compatible, i.e. KVM shouldn't do something completely different
> for a higher, architecturally-defined vPMU version. Verify KVM behavior
> against the effective vPMU version, e.g. advertising vPMU 5 when KVM only
> supports vPMU 2 shouldn't magically unlock vPMU 5 features.
>
> According to Intel SDM, the number of architectural events is reported
> through CPUID.0AH:EAX[31:24] and the architectural event x is supported
> if EBX[x]=0 && EAX[31:24]>x. Note, KVM's ABI is that unavailable events
> do not count, even though strictly speaking that's not required by the
> SDM (the behavior is effectively undefined).
>
> Handcode the entirety of the measured section so that the test can
> precisely assert on the number of instructions and branches retired.
>
> Co-developed-by: Like Xu <likexu@xxxxxxxxxxx>
> Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
> Signed-off-by: Jinrong Liang <cloudliang@xxxxxxxxxxx>
> Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> tools/testing/selftests/kvm/Makefile | 1 +
> .../selftests/kvm/x86_64/pmu_counters_test.c | 321 ++++++++++++++++++
> 2 files changed, 322 insertions(+)
> create mode 100644 tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
>
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index 44d8d022b023..09f5d6fe84de 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -91,6 +91,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
> TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
> TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
> TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
> +TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
> TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
> TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
> TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> new file mode 100644
> index 000000000000..dd9a7864410c
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -0,0 +1,321 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2023, Tencent, Inc.
> + */
> +
> +#define _GNU_SOURCE /* for program_invocation_short_name */
> +#include <x86intrin.h>
> +
> +#include "pmu.h"
> +#include "processor.h"
> +
> +/* Number of LOOP instructions for the guest measurement payload. */
> +#define NUM_BRANCHES 10
> +/*
> + * Number of "extra" instructions that will be counted, i.e. the number of
> + * instructions that are needed to set up the loop and then disabled the
> + * counter. 2 MOV, 2 XOR, 1 WRMSR.
> + */
> +#define NUM_EXTRA_INSNS 5
> +#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
> +
> +static uint8_t kvm_pmu_version;
> +static bool kvm_has_perf_caps;
> +
> +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
> + void *guest_code,
> + uint8_t pmu_version,
> + uint64_t perf_capabilities)
> +{
> + struct kvm_vm *vm;
> +
> + vm = vm_create_with_one_vcpu(vcpu, guest_code);
> + vm_init_descriptor_tables(vm);
> + vcpu_init_descriptor_tables(*vcpu);
> +
> + sync_global_to_guest(vm, kvm_pmu_version);
> +
> + /*
> + * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
> + * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
> + */
> + if (kvm_has_perf_caps)
> + vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
> +
> + vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
> + return vm;
> +}
> +
> +static void run_vcpu(struct kvm_vcpu *vcpu)
> +{
> + struct ucall uc;
> +
> + do {
> + vcpu_run(vcpu);
> + switch (get_ucall(vcpu, &uc)) {
> + case UCALL_SYNC:
> + break;
> + case UCALL_ABORT:
> + REPORT_GUEST_ASSERT(uc);
> + break;
> + case UCALL_PRINTF:
> + pr_info("%s", uc.buffer);
> + break;
> + case UCALL_DONE:
> + break;
> + default:
> + TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
> + }
> + } while (uc.cmd != UCALL_DONE);
> +}
> +
> +static uint8_t guest_get_pmu_version(void)
> +{
> + /*
> + * Return the effective PMU version, i.e. the minimum between what KVM
> + * supports and what is enumerated to the guest. The host deliberately
> + * advertises a PMU version to the guest beyond what is actually
> + * supported by KVM to verify KVM doesn't freak out and do something
> + * bizarre with an architecturally valid, but unsupported, version.
> + */
> + return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
> +}
> +
> +/*
> + * If an architectural event is supported and guaranteed to generate at least
> + * one "hit, assert that its count is non-zero. If an event isn't supported or
> + * the test can't guarantee the associated action will occur, then all bets are
> + * off regarding the count, i.e. no checks can be done.
> + *
> + * Sanity check that in all cases, the event doesn't count when it's disabled,
> + * and that KVM correctly emulates the write of an arbitrary value.
> + */
> +static void guest_assert_event_count(uint8_t idx,
> + struct kvm_x86_pmu_feature event,
> + uint32_t pmc, uint32_t pmc_msr)
> +{
> + uint64_t count;
> +
> + count = _rdpmc(pmc);
> + if (!this_pmu_has(event))
> + goto sanity_checks;
> +
> + switch (idx) {
> + case INTEL_ARCH_INSTRUCTIONS_RETIRED:
> + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
> + break;
> + case INTEL_ARCH_BRANCHES_RETIRED:
> + GUEST_ASSERT_EQ(count, NUM_BRANCHES);
> + break;
> + case INTEL_ARCH_CPU_CYCLES:
> + case INTEL_ARCH_REFERENCE_CYCLES:
> + GUEST_ASSERT_NE(count, 0);
> + break;
> + default:
> + break;
> + }
> +
> +sanity_checks:
> + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> + GUEST_ASSERT_EQ(_rdpmc(pmc), count);
> +
> + wrmsr(pmc_msr, 0xdead);
> + GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
> +}
> +
> +static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
> + uint32_t pmc, uint32_t pmc_msr,
> + uint32_t ctrl_msr, uint64_t ctrl_msr_value)
> +{
> + wrmsr(pmc_msr, 0);
> +
> + /*
> + * Enable and disable the PMC in a monolithic asm blob to ensure that
> + * the compiler can't insert _any_ code into the measured sequence.
> + * Note, ECX doesn't need to be clobbered as the input value, @pmc_msr,
> + * is restored before the end of the sequence.
> + */
> + __asm__ __volatile__("wrmsr\n\t"
> + "mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"
> + "loop .\n\t"
> + "mov %%edi, %%ecx\n\t"
> + "xor %%eax, %%eax\n\t"
> + "xor %%edx, %%edx\n\t"
> + "wrmsr\n\t"
> + :: "a"((uint32_t)ctrl_msr_value),
> + "d"(ctrl_msr_value >> 32),
> + "c"(ctrl_msr), "D"(ctrl_msr)
> + );
> +
> + guest_assert_event_count(idx, event, pmc, pmc_msr);
> +}
> +
> +static void guest_test_arch_event(uint8_t idx)
> +{
> + const struct {
> + struct kvm_x86_pmu_feature gp_event;
> + } intel_event_to_feature[] = {
> + [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES },
> + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED },
> + [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES },
> + [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES },
> + [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES },
> + [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
> + [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
> + };
> +
> + uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
> + uint32_t pmu_version = guest_get_pmu_version();
> + /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
> + bool guest_has_perf_global_ctrl = pmu_version >= 2;
> + struct kvm_x86_pmu_feature gp_event;
> + uint32_t base_pmc_msr;
> + unsigned int i;
> +
> + /* The host side shouldn't invoke this without a guest PMU. */
> + GUEST_ASSERT(pmu_version);
> +
> + if (this_cpu_has(X86_FEATURE_PDCM) &&
> + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
> + base_pmc_msr = MSR_IA32_PMC0;
> + else
> + base_pmc_msr = MSR_IA32_PERFCTR0;
> +
> + gp_event = intel_event_to_feature[idx].gp_event;
> + GUEST_ASSERT_EQ(idx, gp_event.f.bit);
> +
> + GUEST_ASSERT(nr_gp_counters);
> +
> + for (i = 0; i < nr_gp_counters; i++) {
> + uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
> + ARCH_PERFMON_EVENTSEL_ENABLE |
> + intel_pmu_arch_events[idx];
> +
> + wrmsr(MSR_P6_EVNTSEL0 + i, 0);
> + if (guest_has_perf_global_ctrl)
> + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
> +
> + __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
> + MSR_P6_EVNTSEL0 + i, eventsel);
> + }
> +}
> +
> +static void guest_test_arch_events(void)
> +{
> + uint8_t i;
> +
> + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
> + guest_test_arch_event(i);
> +
> + GUEST_DONE();
> +}
> +
> +static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
> + uint8_t length, uint32_t unavailable_mask)
> +{
> + struct kvm_vcpu *vcpu;
> + struct kvm_vm *vm;
> +
> + /* Testing arch events requires a vPMU (there are no negative tests). */
> + if (!pmu_version)
> + return;
> +
> + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
> + pmu_version, perf_capabilities);
> +
> + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
> + length);
> + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
> + unavailable_mask);
> +
> + run_vcpu(vcpu);
> +
> + kvm_vm_free(vm);
> +}
> +
> +static void test_intel_counters(void)
> +{
> + uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
> + uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
> + unsigned int i;
> + uint8_t v, j;
> + uint32_t k;
> +
> + const uint64_t perf_caps[] = {
> + 0,
> + PMU_CAP_FW_WRITES,
> + };
> +
> + /*
> + * Test up to PMU v5, which is the current maximum version defined by
> + * Intel, i.e. is the last version that is guaranteed to be backwards
> + * compatible with KVM's existing behavior.
> + */
> + uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
> +
> + /*
> + * Verify that KVM is sanitizing the architectural events, i.e. hiding
> + * events that KVM doesn't support. This will fail any time KVM adds
> + * support for a new event, but it's worth paying that price to be able
> + * to detect KVM bugs.
> + */
> + TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
> + "KVM is either buggy, or has learned new tricks (length = %u, mask = %x)",
> + nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));

As stated earlier in this series, KVM doesn't have to do anything when
a new architectural event is defined, so this should just say
something like, "New architectural event(s); please update this
test."

> + /*
> + * Force iterating over known arch events regardless of whether or not
> + * KVM/hardware supports a given event.
> + */
> + nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
> +
> + for (v = 0; v <= max_pmu_version; v++) {
> + for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
> + if (!kvm_has_perf_caps && perf_caps[i])
> + continue;
> +
> + pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
> + v, perf_caps[i]);
> + /*
> + * To keep the total runtime reasonable, test every
> + * possible non-zero, non-reserved bitmap combination
> + * only with the native PMU version and the full bit
> + * vector length.
> + */
> + if (v == pmu_version) {
> + for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
> + test_arch_events(v, perf_caps[i], nr_arch_events, k);
> + }
> + /*
> + * Test single bits for all PMU version and lengths up
> + * the number of events +1 (to verify KVM doesn't do
> + * weird things if the guest length is greater than the
> + * host length). Explicitly test a mask of '0' and all
> + * ones i.e. all events being available and unavailable.
> + */
> + for (j = 0; j <= nr_arch_events + 1; j++) {
> + test_arch_events(v, perf_caps[i], j, 0);
> + test_arch_events(v, perf_caps[i], j, -1u);
> +
> + for (k = 0; k < nr_arch_events; k++)
> + test_arch_events(v, perf_caps[i], j, BIT(k));
> + }
> + }
> + }
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
> +
> + TEST_REQUIRE(host_cpu_is_intel);
> + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
> + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
> +
> + kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
> + kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
> +
> + test_intel_counters();
> +
> + return 0;
> +}
> --
> 2.42.0.869.gea05f2083d-goog
>