Re: [PATCH v3 4/7] KVM: SVM: hyper-v: Nested enlightenments in VMCB

From: Tom Lendacky
Date: Fri Apr 23 2021 - 10:56:23 EST


On 4/21/21 9:06 AM, Vineeth Pillai wrote:
> Add Hyper-V specific fields in VMCB to support SVM enlightenments.
> Also a small refactoring of VMCB clean bits handling.
>
> Signed-off-by: Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/svm.h | 24 +++++++++++++++++++++++-
> arch/x86/kvm/svm/svm.c | 8 ++++++++
> arch/x86/kvm/svm/svm.h | 12 +++++++++++-
> 3 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index 1c561945b426..3586d7523ce8 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -322,9 +322,31 @@ static inline void __unused_size_checks(void)
> BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
> }
>
> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +struct __packed hv_enlightenments {
> + struct __packed hv_enlightenments_control {
> + u32 nested_flush_hypercall:1;
> + u32 msr_bitmap:1;
> + u32 enlightened_npt_tlb: 1;
> + u32 reserved:29;
> + } hv_enlightenments_control;
> + u32 hv_vp_id;
> + u64 hv_vm_id;
> + u64 partition_assist_page;
> + u64 reserved;
> +};
> +#define VMCB_CONTROL_END 992 // 32 bytes for Hyper-V
> +#else
> +#define VMCB_CONTROL_END 1024
> +#endif
> +
> struct vmcb {
> struct vmcb_control_area control;
> - u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
> + u8 reserved_control[VMCB_CONTROL_END - sizeof(struct vmcb_control_area)];
> +#if IS_ENABLED(CONFIG_HYPERV)
> + struct hv_enlightenments hv_enlightenments;
> +#endif

I believe the 32 bytes at the end of the VMCB control area will be for use
by any software/hypervisor. The APM update that documents this change,
along with clean bit 31, isn't public, yet, but should be in a month or so
(from what I was told).

So these fields should be added generically and then your code should make
use of the generic field mapped with your structure.

To my knowledge (until the APM is public and documents everything), I
believe the following will be in place:

VMCB offset 0x3e0 - 0x3ff is reserved for software
Clean bit 31 is reserved for software
SVM intercept exit code 0xf0000000 is reserved for software

> struct vmcb_save_area save;
> } __packed;
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index baee91c1e936..9a241a0806cd 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -31,6 +31,7 @@
> #include <asm/tlbflush.h>
> #include <asm/desc.h>
> #include <asm/debugreg.h>
> +#include <asm/hypervisor.h>
> #include <asm/kvm_para.h>
> #include <asm/irq_remapping.h>
> #include <asm/spec-ctrl.h>
> @@ -122,6 +123,8 @@ bool npt_enabled = true;
> bool npt_enabled;
> #endif
>
> +u32 __read_mostly vmcb_all_clean_mask = ((1 << VMCB_DIRTY_MAX) - 1);
> +
> /*
> * These 2 parameters are used to config the controls for Pause-Loop Exiting:
> * pause_filter_count: On processors that support Pause filtering(indicated
> @@ -1051,6 +1054,11 @@ static __init int svm_hardware_setup(void)
> */
> allow_smaller_maxphyaddr = !npt_enabled;
>
> +#if IS_ENABLED(CONFIG_HYPERV)
> + if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
> + vmcb_all_clean_mask |= BIT(VMCB_HV_NESTED_ENLIGHTENMENTS);
> +#endif
> +

Is there any way to hide all the #if's in this and the other patches so
that the .c files are littered with the #if IS_ENABLED() lines. Put them
in svm.h or a new svm-hv.h file?

> return 0;
>
> err:
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 39e071fdab0c..ff0a70bd7fce 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -51,11 +51,16 @@ enum {
> * AVIC LOGICAL_TABLE pointer
> */
> VMCB_DIRTY_MAX,
> +#if IS_ENABLED(CONFIG_HYPERV)
> + VMCB_HV_NESTED_ENLIGHTENMENTS = 31,
> +#endif

Again, this should be generic.

Thanks,
Tom

> };
>
> /* TPR and CR2 are always written before VMRUN */
> #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
>
> +extern u32 vmcb_all_clean_mask __read_mostly;
> +
> struct kvm_sev_info {
> bool active; /* SEV enabled guest */
> bool es_active; /* SEV-ES enabled guest */
> @@ -230,7 +235,7 @@ static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
>
> static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
> {
> - vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
> + vmcb->control.clean = vmcb_all_clean_mask
> & ~VMCB_ALWAYS_DIRTY_MASK;
> }
>
> @@ -239,6 +244,11 @@ static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
> vmcb->control.clean &= ~(1 << bit);
> }
>
> +static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
> +{
> + return (vmcb->control.clean & (1 << bit));
> +}
> +
> static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> {
> return container_of(vcpu, struct vcpu_svm, vcpu);
>