Re: [PATCH 3/3 v2] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
From: Paolo Bonzini
Date: Fri Jul 07 2017 - 04:30:59 EST
On 07/07/2017 01:03, Bandan Das wrote:
> When L2 uses vmfunc, L0 utilizes the associated vmexit to
> emulate a switching of the ept pointer by reloading the
> guest MMU.
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Signed-off-by: Bandan Das <bsd@xxxxxxxxxx>
> ---
> arch/x86/include/asm/vmx.h | 6 +++++
> arch/x86/kvm/vmx.c | 55 +++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index da5375e..5f63a2e 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -115,6 +115,10 @@
> #define VMX_MISC_SAVE_EFER_LMA 0x00000020
> #define VMX_MISC_ACTIVITY_HLT 0x00000040
>
> +/* VMFUNC functions */
> +#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
> +#define VMFUNC_EPTP_ENTRIES 512
> +
> static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
> {
> return vmx_basic & GENMASK_ULL(30, 0);
> @@ -200,6 +204,8 @@ enum vmcs_field {
> EOI_EXIT_BITMAP2_HIGH = 0x00002021,
> EOI_EXIT_BITMAP3 = 0x00002022,
> EOI_EXIT_BITMAP3_HIGH = 0x00002023,
> + EPTP_LIST_ADDRESS = 0x00002024,
> + EPTP_LIST_ADDRESS_HIGH = 0x00002025,
> VMREAD_BITMAP = 0x00002026,
> VMWRITE_BITMAP = 0x00002028,
> XSS_EXIT_BITMAP = 0x0000202C,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 7364678..3a4aa68 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -246,6 +246,7 @@ struct __packed vmcs12 {
> u64 eoi_exit_bitmap1;
> u64 eoi_exit_bitmap2;
> u64 eoi_exit_bitmap3;
> + u64 eptp_list_address;
> u64 xss_exit_bitmap;
> u64 guest_physical_address;
> u64 vmcs_link_pointer;
> @@ -771,6 +772,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
> FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
> FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
> FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
> + FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
> FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
> FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
> FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> @@ -1402,6 +1404,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
> }
>
> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
> +{
> + return nested_cpu_has_vmfunc(vmcs12) &&
> + (vmcs12->vm_function_control &
> + VMX_VMFUNC_EPTP_SWITCHING);
> +}
> +
> static inline bool is_nmi(u32 intr_info)
> {
> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
> if (cpu_has_vmx_vmfunc()) {
> vmx->nested.nested_vmx_secondary_ctls_high |=
> SECONDARY_EXEC_ENABLE_VMFUNC;
> - vmx->nested.nested_vmx_vmfunc_controls = 0;
> + /*
> + * Advertise EPTP switching unconditionally
> + * since we emulate it
> + */
> + vmx->nested.nested_vmx_vmfunc_controls =
> + VMX_VMFUNC_EPTP_SWITCHING;
> }
>
> /*
> @@ -7772,6 +7786,9 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct vmcs12 *vmcs12;
> u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
> + struct page *page = NULL;
> + u64 *l1_eptp_list;
>
> /*
> * VMFUNC is only supported for nested guests, but we always enable the
> @@ -7784,11 +7801,43 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
> }
>
> vmcs12 = get_vmcs12(vcpu);
> - if ((vmcs12->vm_function_control & (1 << function)) == 0)
> + if (((vmcs12->vm_function_control & (1 << function)) == 0) ||
> + WARN_ON_ONCE(function))
> + goto fail;
> +
> + if (!nested_cpu_has_ept(vmcs12) ||
> + !nested_cpu_has_eptp_switching(vmcs12))
> + goto fail;
> +
> + if (!vmcs12->eptp_list_address || index >= VMFUNC_EPTP_ENTRIES)
> + goto fail;
> +
> + page = nested_get_page(vcpu, vmcs12->eptp_list_address);
> + if (!page)
> + goto fail;
> +
> + l1_eptp_list = kmap(page);
> + if (!l1_eptp_list[index])
> goto fail;
> - WARN(1, "VMCS12 VM function control should have been zero");
> +
> + /*
> + * If the (L2) guest does a vmfunc to the currently
> + * active ept pointer, we don't have to do anything else
> + */
> + if (vmcs12->ept_pointer != l1_eptp_list[index]) {
> + kvm_mmu_unload(vcpu);
> + /*
> + * TODO: Verify that guest ept satisfies vmentry prereqs
> + */
> + vmcs12->ept_pointer = l1_eptp_list[index];
> + kvm_mmu_reload(vcpu);
> + kunmap(page);
> + }
Missing nested_release_page_clean, here and at the "fail" label.
The TODO is a symptom of a bigger problem, so I guess it's okay for now.
Paolo
> + return kvm_skip_emulated_instruction(vcpu);
>
> fail:
> + if (page)
> + kunmap(page);
> nested_vmx_vmexit(vcpu, vmx->exit_reason,
> vmcs_read32(VM_EXIT_INTR_INFO),
> vmcs_readl(EXIT_QUALIFICATION));
>