[PATCH 2/2] KVM: x86/mmu: Ensure TLBs are flushed when yielding during NX zapping

From: Sean Christopherson
Date: Fri Mar 19 2021 - 19:21:18 EST


Fix two intertwined bugs in the NX huge page zapping that were introduced
by the incorporation of the TDP MMU. Because there is a unified list of
NX huge pages, zapping can encounter both TDP MMU and legacy MMU pages,
and the two MMUs have different tracking for TLB flushing. If one flavor
needs a flush, but the code for the other flavor yields, KVM will fail to
flush before yielding.

First, honor the "flush needed" return from kvm_tdp_mmu_zap_gfn_range(),
which does the flush itself if and only if it yields, and otherwise
expects the caller to do the flush. This requires feeding the result
into kvm_mmu_remote_flush_or_zap(), and so also fixes the case where the
TDP MMU needs a flush, the legacy MMU does not, and the main loop yields.

Second, tell the TDP MMU a flush is pending if the list of zapped pages
from legacy MMUs is not empty, i.e. the legacy MMU needs a flush. This
fixes the case where the TDP MMU yields, but it iteslf does not require a
flush.

Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU")
Cc: stable@xxxxxxxxxxxxxxx
Cc: Ben Gardon <bgardon@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 15 ++++++++++-----
arch/x86/kvm/mmu/tdp_mmu.c | 6 +++---
arch/x86/kvm/mmu/tdp_mmu.h | 3 ++-
3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c6ed633594a2..413d6259340e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5517,7 +5517,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
}

if (is_tdp_mmu_enabled(kvm)) {
- flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end);
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end,
+ false);
if (flush)
kvm_flush_remote_tlbs(kvm);
}
@@ -5939,6 +5940,8 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
struct kvm_mmu_page *sp;
unsigned int ratio;
LIST_HEAD(invalid_list);
+ bool flush = false;
+ gfn_t gfn_end;
ulong to_zap;

rcu_idx = srcu_read_lock(&kvm->srcu);
@@ -5960,19 +5963,21 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
lpage_disallowed_link);
WARN_ON_ONCE(!sp->lpage_disallowed);
if (is_tdp_mmu_page(sp)) {
- kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
- sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+ gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end,
+ flush || !list_empty(&invalid_list));
} else {
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
WARN_ON_ONCE(sp->lpage_disallowed);
}

if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
cond_resched_rwlock_write(&kvm->mmu_lock);
+ flush = false;
}
}
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);

write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6cf08c3c537f..367f12bf1026 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -709,10 +709,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
* SPTEs have been cleared and a TLB flush is needed before releasing the
* MMU lock.
*/
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+ bool flush)
{
struct kvm_mmu_page *root;
- bool flush = false;

for_each_tdp_mmu_root_yield_safe(kvm, root)
flush = zap_gfn_range(kvm, root, start, end, true, flush);
@@ -725,7 +725,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
bool flush;

- flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn, false);
if (flush)
kvm_flush_remote_tlbs(kvm);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 3b761c111bff..e39bee52d49e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -8,7 +8,8 @@
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);

-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+ bool flush);
void kvm_tdp_mmu_zap_all(struct kvm *kvm);

int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
--
2.31.0.rc2.261.g7f71774620-goog