Re: [PATCH v4 2/7] arm64: KVM: Use shared area to pass PMU event state to hypervisor

From: James Clark
Date: Thu Feb 01 2024 - 11:17:26 EST




On 04/01/2024 16:27, James Clark wrote:
> Currently the state of the PMU events is copied into the VCPU struct
> before every VCPU run. This isn't scalable if more data for other
> features needs to be added too. So make a writable area that's shared
> between the host and the hypervisor to store this state.
>
> Normal per-cpu constructs can't be used because although the framework
> exists for the host to write to the hypervisor's per-cpu structs, this
> only works until the protection is enabled. And for the other way
> around, no framework exists for the hypervisor to access the host's size
> and layout of per-cpu data. Instead of making a new framework for the
> hypervisor to access the host's per-cpu data that would only be used
> once, just define the new shared area as an array with NR_CPUS elements.
> This also reduces the amount of sharing that needs to be done, because
> unlike this array, the per-cpu data isn't contiguous.
>
> Signed-off-by: James Clark <james.clark@xxxxxxx>

Hi Marc,

Do you have any feedback about whether this what you were thinking of in
your comment here:
https://lore.kernel.org/kvmarm/86msuqb84g.wl-maz@xxxxxxxxxx/

Thanks
James

> ---
> arch/arm64/include/asm/kvm_host.h | 8 ++++++++
> arch/arm64/kernel/image-vars.h | 1 +
> arch/arm64/kvm/arm.c | 16 ++++++++++++++--
> arch/arm64/kvm/hyp/nvhe/setup.c | 11 +++++++++++
> arch/arm64/kvm/hyp/nvhe/switch.c | 9 +++++++--
> arch/arm64/kvm/pmu.c | 4 +---
> include/kvm/arm_pmu.h | 17 -----------------
> 7 files changed, 42 insertions(+), 24 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 824f29f04916..93d38ad257ed 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -466,6 +466,14 @@ struct kvm_cpu_context {
> struct kvm_vcpu *__hyp_running_vcpu;
> };
>
> +struct kvm_host_global_state {
> + struct kvm_pmu_events {
> + u32 events_host;
> + u32 events_guest;
> + } pmu_events;
> +} ____cacheline_aligned;
> +extern struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
> +
> struct kvm_host_data {
> struct kvm_cpu_context host_ctxt;
> };
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 119ca121b5f8..1a9dbb02bb4a 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -59,6 +59,7 @@ KVM_NVHE_ALIAS(alt_cb_patch_nops);
>
> /* Global kernel state accessed by nVHE hyp code. */
> KVM_NVHE_ALIAS(kvm_vgic_global_state);
> +KVM_NVHE_ALIAS(kvm_host_global_state);
>
> /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
> KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 4796104c4471..bd6b2eda5f4f 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -47,6 +47,20 @@
>
> static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
>
> +/*
> + * Host state that isn't associated with any VCPU, but will affect any VCPU
> + * running on a host CPU in the future. This remains writable from the host and
> + * readable in the hyp.
> + *
> + * PER_CPU constructs aren't compatible between the hypervisor and the host so
> + * just define it as a NR_CPUS array. DECLARE_KVM_NVHE_PER_CPU works in both
> + * places, but not after the hypervisor protection is initialised. After that,
> + * kvm_arm_hyp_percpu_base isn't accessible from the host, so even if the
> + * kvm_host_global_state struct was shared with the host, the per-cpu offset
> + * can't be calculated without sharing even more data with the host.
> + */
> +struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
> +
> DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
>
> DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
> @@ -1016,8 +1030,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>
> kvm_vgic_flush_hwstate(vcpu);
>
> - kvm_pmu_update_vcpu_events(vcpu);
> -
> /*
> * Ensure we set mode to IN_GUEST_MODE after we disable
> * interrupts and before the final VCPU requests check.
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index b5452e58c49a..3e45cc10ba96 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -159,6 +159,17 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
> if (ret)
> return ret;
>
> + /*
> + * Similar to kvm_vgic_global_state, but this one remains writable by
> + * the host rather than read-only. Used to store per-cpu state about the
> + * host that isn't associated with any particular VCPU.
> + */
> + prot = pkvm_mkstate(KVM_PGTABLE_PROT_RW, PKVM_PAGE_SHARED_OWNED);
> + ret = pkvm_create_mappings(&kvm_host_global_state,
> + &kvm_host_global_state + 1, prot);
> + if (ret)
> + return ret;
> +
> ret = create_hyp_debug_uart_mapping();
> if (ret)
> return ret;
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index c50f8459e4fc..89147a9dc38c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -130,13 +130,18 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
> }
> }
>
> +static struct kvm_pmu_events *kvm_nvhe_get_pmu_events(struct kvm_vcpu *vcpu)
> +{
> + return &kvm_host_global_state[vcpu->cpu].pmu_events;
> +}
> +
> /*
> * Disable host events, enable guest events
> */
> #ifdef CONFIG_HW_PERF_EVENTS
> static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
> {
> - struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> + struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>
> if (pmu->events_host)
> write_sysreg(pmu->events_host, pmcntenclr_el0);
> @@ -152,7 +157,7 @@ static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
> */
> static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
> {
> - struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> + struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>
> if (pmu->events_guest)
> write_sysreg(pmu->events_guest, pmcntenclr_el0);
> diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
> index a243934c5568..136d5c6c1916 100644
> --- a/arch/arm64/kvm/pmu.c
> +++ b/arch/arm64/kvm/pmu.c
> @@ -6,8 +6,6 @@
> #include <linux/kvm_host.h>
> #include <linux/perf_event.h>
>
> -static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
> -
> /*
> * Given the perf event attributes and system type, determine
> * if we are going to need to switch counters at guest entry/exit.
> @@ -28,7 +26,7 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
>
> struct kvm_pmu_events *kvm_get_pmu_events(void)
> {
> - return this_cpu_ptr(&kvm_pmu_events);
> + return &kvm_host_global_state[smp_processor_id()].pmu_events;
> }
>
> /*
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 4b9d8fb393a8..71a835970ab5 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -18,14 +18,8 @@ struct kvm_pmc {
> struct perf_event *perf_event;
> };
>
> -struct kvm_pmu_events {
> - u32 events_host;
> - u32 events_guest;
> -};
> -
> struct kvm_pmu {
> struct irq_work overflow_work;
> - struct kvm_pmu_events events;
> struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
> int irq_num;
> bool created;
> @@ -79,17 +73,6 @@ void kvm_vcpu_pmu_resync_el0(void);
> #define kvm_vcpu_has_pmu(vcpu) \
> (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PMU_V3))
>
> -/*
> - * Updates the vcpu's view of the pmu events for this cpu.
> - * Must be called before every vcpu run after disabling interrupts, to ensure
> - * that an interrupt cannot fire and update the structure.
> - */
> -#define kvm_pmu_update_vcpu_events(vcpu) \
> - do { \
> - if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
> - vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
> - } while (0)
> -
> /*
> * Evaluates as true when emulating PMUv3p5, and false otherwise.
> */