Re: [PATCH] KVM: x86/pmu: Drop event_type and rename "struct kvm_event_hw_type_mapping"

From: Sean Christopherson
Date: Thu Jan 26 2023 - 17:10:03 EST


On Mon, Dec 05, 2022, Like Xu wrote:
> From: Like Xu <likexu@xxxxxxxxxxx>
>
> After commit ("02791a5c362b KVM: x86/pmu: Use PERF_TYPE_RAW
> to merge reprogram_{gp,fixed}counter()"), vPMU starts to directly
> use the hardware event eventsel and unit_mask to reprogram perf_event,
> and the event_type field in the "struct kvm_event_hw_type_mapping"
> is simply no longer being used.
>
> After discarding this field, the name of the structure also lost
> its mapping semantics, renaming it "struct kvm_pmu_hw_event" and
> reorganizing the comments to continue to help newcomers.
>
> Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
> ---
> arch/x86/kvm/pmu.h | 3 +--
> arch/x86/kvm/vmx/pmu_intel.c | 34 +++++++++++++++++++++++++---------
> 2 files changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 85ff3c0588ba..2aef09eafb70 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -18,10 +18,9 @@
> #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
> #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
>
> -struct kvm_event_hw_type_mapping {
> +struct kvm_pmu_hw_event {

The only user of the struct is the array, just make it anonymous struct and
constify the array. No need to send a v2, I'll fixup when applying.

> u8 eventsel;
> u8 unit_mask;
> - unsigned event_type;
> };
>
> struct kvm_pmu_ops {
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 28b0a784f6e9..d34e9f85bdce 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -20,16 +20,32 @@
>
> #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
>
> -static struct kvm_event_hw_type_mapping intel_arch_events[] = {
> - [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
> - [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
> - [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
> - [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
> - [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
> - [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
> - [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
> +/*
> + * The first part of hw_events in the following array represent Intel's
> + * Pre-defined Architectural Performance Events in an ordered manner:
> + *
> + * 0 - PERF_COUNT_HW_CPU_CYCLES
> + * 1 - PERF_COUNT_HW_INSTRUCTIONS
> + * 2 - PERF_COUNT_HW_BUS_CYCLES
> + * 3 - PERF_COUNT_HW_CACHE_REFERENCES
> + * 4 - PERF_COUNT_HW_CACHE_MISSES
> + * 5 - PERF_COUNT_HW_BRANCH_INSTRUCTIONS
> + * 6 - PERF_COUNT_HW_BRANCH_MISSES
> + *
> + * the second part of hw_events is defined by the generic kernel perf:
> + *
> + * 7 - PERF_COUNT_HW_REF_CPU_CYCLES
> + */
> +static struct kvm_pmu_hw_event intel_arch_events[] = {
> + [0] = { 0x3c, 0x00 },

*sigh*

This made me actually look at all the code, and it just about broke my WTF-o-meter.
The fragility of this code is mind-bogglingly unnecessary. Instead of adding a
comment to document magic numbers, express that information in code.

E.g. there's zero excuses for us to have code like this

static int fixed_pmc_events[] = {1, 0, 7};

and this

/* disable event that reported as not present by cpuid */
if ((i < 7) && !(pmu->available_event_types & (1 << i)))
return false;

I'll post patches to clean this up so that we have the following, which is (a)
self-documenting and (b) won't break or require updating magic numbers when the
next architectural event comes along.


enum intel_pmu_architectural_events {
/*
* The order of the architectural events matters as support for each
* event is enumerated via CPUID using the index of the event.
*/
INTEL_ARCH_CPU_CYCLES,
INTEL_ARCH_INSTRUCTIONS_RETIRED,
INTEL_ARCH_REFERENCE_CYCLES,
INTEL_ARCH_LLC_REFERNCES,
INTEL_ARCH_LLC_MISSES,
INTEL_ARCH_BRANCHES_RETIRED,
INTEL_ARCH_BRANCHES_MISPREDICTED,

NR_INTEL_ARCH_EVENTS,

/*
* Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
* TSC reference cycles. The architectural reference cycles event may
* or may not actually use the TSC as the reference, e.g. might use the
* core crystal clock or the bus clock (yeah, "architectural").
*/
PSEUDO_ARCH_REFERENCE_CYCLES = NR_INTEL_ARCH_EVENTS,
};

static struct {
u8 eventsel;
u8 unit_mask;
} const intel_arch_events[] = {
[INTEL_ARCH_CPU_CYCLES] = { 0x3c, 0x00 },
[INTEL_ARCH_INSTRUCTIONS_RETIRED] = { 0xc0, 0x00 },
[INTEL_ARCH_REFERENCE_CYCLES] = { 0x3c, 0x01 },
[INTEL_ARCH_LLC_REFERNCES] = { 0x2e, 0x4f },
[INTEL_ARCH_LLC_MISSES] = { 0x2e, 0x41 },
[INTEL_ARCH_BRANCHES_RETIRED] = { 0xc4, 0x00 },
[INTEL_ARCH_BRANCHES_MISPREDICTED] = { 0xc5, 0x00 },
[PSEUDO_ARCH_REFERENCE_CYCLES] = { 0x00, 0x03 },
};

/* mapping between fixed pmc index and intel_arch_events array */
static int fixed_pmc_events[] = {
[0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
[1] = INTEL_ARCH_CPU_CYCLES,
[2] = PSEUDO_ARCH_REFERENCE_CYCLES,
};