Re: [PATCH v2 4/7] KVM: SVM: hyper-v: Nested enlightenments in VMCB

From: Vitaly Kuznetsov
Date: Fri Apr 16 2021 - 04:58:36 EST


Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx> writes:

> Add Hyper-V specific fields in VMCB to support SVM enlightenments.
> Also a small refactoring of VMCB clean bits handling.
>
> Signed-off-by: Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/svm.h | 24 +++++++++++++++++++++++-
> arch/x86/kvm/svm/svm.c | 8 ++++++++
> arch/x86/kvm/svm/svm.h | 30 ++++++++++++++++++++++++++++--
> 3 files changed, 59 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index 1c561945b426..3586d7523ce8 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -322,9 +322,31 @@ static inline void __unused_size_checks(void)
> BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
> }
>
> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +struct __packed hv_enlightenments {
> + struct __packed hv_enlightenments_control {
> + u32 nested_flush_hypercall:1;
> + u32 msr_bitmap:1;
> + u32 enlightened_npt_tlb: 1;
> + u32 reserved:29;
> + } hv_enlightenments_control;
> + u32 hv_vp_id;
> + u64 hv_vm_id;
> + u64 partition_assist_page;
> + u64 reserved;
> +};

Enlightened VMCS seems to have the same part:

struct {
u32 nested_flush_hypercall:1;
u32 msr_bitmap:1;
u32 reserved:30;
} __packed hv_enlightenments_control;
u32 hv_vp_id;
u64 hv_vm_id;
u64 partition_assist_page;

Would it maybe make sense to unify these two (in case they are the same
thing in Hyper-V, of course)?


> +#define VMCB_CONTROL_END 992 // 32 bytes for Hyper-V
> +#else
> +#define VMCB_CONTROL_END 1024
> +#endif
> +
> struct vmcb {
> struct vmcb_control_area control;
> - u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
> + u8 reserved_control[VMCB_CONTROL_END - sizeof(struct vmcb_control_area)];
> +#if IS_ENABLED(CONFIG_HYPERV)
> + struct hv_enlightenments hv_enlightenments;
> +#endif
> struct vmcb_save_area save;
> } __packed;
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index baee91c1e936..2ad1f55c88d0 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -31,6 +31,7 @@
> #include <asm/tlbflush.h>
> #include <asm/desc.h>
> #include <asm/debugreg.h>
> +#include <asm/hypervisor.h>
> #include <asm/kvm_para.h>
> #include <asm/irq_remapping.h>
> #include <asm/spec-ctrl.h>
> @@ -122,6 +123,8 @@ bool npt_enabled = true;
> bool npt_enabled;
> #endif
>
> +u32 __read_mostly vmcb_all_clean_mask = VMCB_ALL_CLEAN_MASK;
> +
> /*
> * These 2 parameters are used to config the controls for Pause-Loop Exiting:
> * pause_filter_count: On processors that support Pause filtering(indicated
> @@ -1051,6 +1054,11 @@ static __init int svm_hardware_setup(void)
> */
> allow_smaller_maxphyaddr = !npt_enabled;
>
> +#if IS_ENABLED(CONFIG_HYPERV)
> + if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
> + vmcb_all_clean_mask |= VMCB_HYPERV_CLEAN_MASK;
> +#endif
> +
> return 0;
>
> err:
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 39e071fdab0c..63ed05c8027b 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -33,6 +33,11 @@ static const u32 host_save_user_msrs[] = {
> extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
> extern bool npt_enabled;
>
> +/*
> + * Clean bits in VMCB.
> + * VMCB_ALL_CLEAN_MASK and VMCB_HYPERV_CLEAN_MASK might
> + * also need to be updated if this enum is modified.
> + */
> enum {
> VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
> pause filter count */
> @@ -50,12 +55,28 @@ enum {
> * AVIC PHYSICAL_TABLE pointer,
> * AVIC LOGICAL_TABLE pointer
> */
> - VMCB_DIRTY_MAX,
> +#if IS_ENABLED(CONFIG_HYPERV)
> + VMCB_HV_NESTED_ENLIGHTENMENTS = 31,
> +#endif
> };
>
> +#define VMCB_ALL_CLEAN_MASK ( \
> + (1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) | \
> + (1U << VMCB_ASID) | (1U << VMCB_INTR) | \
> + (1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \
> + (1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \
> + (1U << VMCB_LBR) | (1U << VMCB_AVIC) \
> + )

What if we preserve VMCB_DIRTY_MAX and drop this newly introduced
VMCB_ALL_CLEAN_MASK (which basically lists all the members of the enum
above)? '1 << VMCB_DIRTY_MAX' can still work. (If the 'VMCB_DIRTY_MAX'
name becomes misleading we can e.g. rename it to VMCB_NATIVE_DIRTY_MAX
or something but I'm not sure it's worth it)

> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +#define VMCB_HYPERV_CLEAN_MASK (1U << VMCB_HV_NESTED_ENLIGHTENMENTS)
> +#endif

VMCB_HYPERV_CLEAN_MASK is a single bit, why do we need it at all
(BIT(VMCB_HV_NESTED_ENLIGHTENMENTS) is not super long)

> +
> /* TPR and CR2 are always written before VMRUN */
> #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
>
> +extern u32 vmcb_all_clean_mask __read_mostly;
> +
> struct kvm_sev_info {
> bool active; /* SEV enabled guest */
> bool es_active; /* SEV-ES enabled guest */
> @@ -230,7 +251,7 @@ static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
>
> static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
> {
> - vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
> + vmcb->control.clean = vmcb_all_clean_mask
> & ~VMCB_ALWAYS_DIRTY_MASK;
> }
>
> @@ -239,6 +260,11 @@ static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
> vmcb->control.clean &= ~(1 << bit);
> }
>
> +static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
> +{
> + return (vmcb->control.clean & (1 << bit));
> +}
> +
> static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> {
> return container_of(vcpu, struct vcpu_svm, vcpu);

--
Vitaly