Re: [PATCH 3/3] KVM: x86: fix use of L1 MMIO areas in nested guests
From: David Hildenbrand
Date: Thu Aug 17 2017 - 04:11:15 EST
On 11.08.2017 18:52, Paolo Bonzini wrote:
> There is currently some confusion between nested and L1 GPAs. The
> assignment to "direct" in kvm_mmu_page_fault tries to fix that, but
> it is not enough. What this patch does is fence off the MMIO cache
> completely when using shadow nested page tables, since we have neither
> a GVA nor an L1 GPA to put in the cache. This also allows some
> simplifications in kvm_mmu_page_fault and FNAME(page_fault).
>
> The EPT misconfig likewise does not have an L1 GPA to pass to
> kvm_io_bus_write, so that must be skipped for guest mode.
The complexity of the mmu and such non-trivial corner case scares me
every time :)
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/kvm/mmu.c | 10 +++++++++-
> arch/x86/kvm/paging_tmpl.h | 3 +--
> arch/x86/kvm/vmx.c | 12 +++++++++---
> arch/x86/kvm/x86.h | 6 +++++-
> 4 files changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index f5c3f8e7d29f..f3665947bcc5 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3598,6 +3598,14 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
>
> static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> {
> + /*
> + * A nested guest cannot use the MMIO cache if it is using nested
> + * page tables, because cr2 is a nGPA while the cache stores L1's
> + * physical addresses.
> + */
> + if (mmu_is_nested(vcpu))
> + return false;
> +
> if (direct)
> return vcpu_match_mmio_gpa(vcpu, addr);
>
> @@ -4827,7 +4835,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
> {
> int r, emulation_type = EMULTYPE_RETRY;
> enum emulation_result er;
> - bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
> + bool direct = vcpu->arch.mmu.direct_map;
>
> /*
> * With shadow page tables, fault_address contains a GVA
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 3bb90ceeb52d..86b68dc5a649 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
> &map_writable))
> return 0;
>
> - if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
> - walker.gfn, pfn, walker.pte_access, &r))
> + if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
> return r;
>
> /*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 79efb00dd70d..e3989461f938 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6402,10 +6402,16 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
> int ret;
> gpa_t gpa;
>
> + /*
> + * A nested guest cannot optimize MMIO vmexits, because we have an
> + * nGPA here instead of the required GPA.
> + */
> gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> - if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
> - trace_kvm_fast_mmio(gpa);
> - return kvm_skip_emulated_instruction(vcpu);
> + if (!is_guest_mode(vcpu)) {
> + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
if (!is_guest_mode(vcpu) &&
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL) ...
could be done, so following code won't be changed.
> + trace_kvm_fast_mmio(gpa);
> + return kvm_skip_emulated_instruction(vcpu);
> + }
> }
>
> ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 612067074905..2383d2ce0a84 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -90,7 +90,11 @@ static inline u32 bit(int bitno)
> static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
> gva_t gva, gfn_t gfn, unsigned access)
> {
> - vcpu->arch.mmio_gva = gva & PAGE_MASK;
> + /*
> + * If this is a shadow nested page table, the "GVA" is
> + * actually a nested GPA.
nGPA ? (to stick to terminology)
> + */
> + vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
> vcpu->arch.access = access;
> vcpu->arch.mmio_gfn = gfn;
> vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
>
--
Thanks,
David