Re: [PATCH 2/4] KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE
From: Vitaly Kuznetsov
Date: Fri Dec 18 2020 - 04:11:54 EST
Sean Christopherson <seanjc@xxxxxxxxxx> writes:
> Get the so called "root" level from the low level shadow page table
> walkers instead of manually attempting to calculate it higher up the
> stack, e.g. in get_mmio_spte(). When KVM is using PAE shadow paging,
> the starting level of the walk, from the callers perspective, is not
> the CR3 root but rather the PDPTR "root". Checking for reserved bits
> from the CR3 root causes get_mmio_spte() to consume uninitialized stack
> data due to indexing into sptes[] for a level that was not filled by
> get_walk(). This can result in false positives and/or negatives
> depending on what garbage happens to be on the stack.
>
> Opportunistically nuke a few extra newlines.
>
> Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
> Reported-by: Richard Herbert <rherbert@xxxxxxxxxxxx>
> Cc: Ben Gardon <bgardon@xxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/kvm/mmu/mmu.c | 15 ++++++---------
> arch/x86/kvm/mmu/tdp_mmu.c | 5 ++++-
> arch/x86/kvm/mmu/tdp_mmu.h | 4 +++-
> 3 files changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a48cd12c01d7..52f36c879086 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> * Return the level of the lowest level SPTE added to sptes.
> * That SPTE may be non-present.
> */
> -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
> +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
> {
> struct kvm_shadow_walk_iterator iterator;
> int leaf = -1;
> u64 spte;
>
> -
> walk_shadow_page_lockless_begin(vcpu);
>
> - for (shadow_walk_init(&iterator, vcpu, addr);
> + for (shadow_walk_init(&iterator, vcpu, addr),
> + *root_level = iterator.level;
> shadow_walk_okay(&iterator);
> __shadow_walk_next(&iterator, spte)) {
> leaf = iterator.level;
> @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
>
> if (!is_shadow_present_pte(spte))
> break;
> -
> }
>
> walk_shadow_page_lockless_end(vcpu);
> @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
> {
> u64 sptes[PT64_ROOT_MAX_LEVEL];
> struct rsvd_bits_validate *rsvd_check;
> - int root = vcpu->arch.mmu->shadow_root_level;
> - int leaf;
> - int level;
> + int root, leaf, level;
> bool reserved = false;
Personal taste: I would've renamed 'root' to 'root_level' (to be
consistent with get_walk()/kvm_tdp_mmu_get_walk()) and 'level' to
e.g. 'l' as it's only being used as an interator ('i' would also do).
>
> if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
> @@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
> }
>
> if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
> - leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
> + leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
> else
> - leaf = get_walk(vcpu, addr, sptes);
> + leaf = get_walk(vcpu, addr, sptes, &root);
>
> if (unlikely(leaf < 0)) {
> *sptep = 0ull;
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 50cec7a15ddb..a4f9447f8327 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
> * Return the level of the lowest level SPTE added to sptes.
> * That SPTE may be non-present.
> */
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
> +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> + int *root_level)
> {
> struct tdp_iter iter;
> struct kvm_mmu *mmu = vcpu->arch.mmu;
> gfn_t gfn = addr >> PAGE_SHIFT;
> int leaf = -1;
>
> + *root_level = vcpu->arch.mmu->shadow_root_level;
> +
> tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
> leaf = iter.level;
> sptes[leaf - 1] = iter.old_spte;
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> index 556e065503f6..cbbdbadd1526 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.h
> +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
> bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
> struct kvm_memory_slot *slot, gfn_t gfn);
>
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
> +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> + int *root_level);
> +
> #endif /* __KVM_X86_MMU_TDP_MMU_H */
Reviewed-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
--
Vitaly