[PATCH 2/2] KVM: Don't take mmu_lock for range invalidation unless necessary

From: Paolo Bonzini
Date: Tue Jul 27 2021 - 13:18:19 EST


From: Sean Christopherson <seanjc@xxxxxxxxxx>

Avoid taking mmu_lock for .invalidate_range_{start,end}() notifications
that are unrelated to KVM. This is possible now that memslot updates are
blocked from range_start() to range_end(); that ensures that lock elision
happens in both or none, and therefore that mmu_notifier_count updates
(which must occur while holding mmu_lock for write) are always paired
across start->end.

Based on patches originally written by Ben Gardon.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
virt/kvm/kvm_main.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c64a7de60846..a96cbe24c688 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -496,17 +496,6 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,

idx = srcu_read_lock(&kvm->srcu);

- /* The on_lock() path does not yet support lock elision. */
- if (!IS_KVM_NULL_FN(range->on_lock)) {
- locked = true;
- KVM_MMU_LOCK(kvm);
-
- range->on_lock(kvm, range->start, range->end);
-
- if (IS_KVM_NULL_FN(range->handler))
- goto out_unlock;
- }
-
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(slot, slots) {
@@ -538,6 +527,10 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
if (!locked) {
locked = true;
KVM_MMU_LOCK(kvm);
+ if (!IS_KVM_NULL_FN(range->on_lock))
+ range->on_lock(kvm, range->start, range->end);
+ if (IS_KVM_NULL_FN(range->handler))
+ break;
}
ret |= range->handler(kvm, &gfn_range);
}
@@ -546,7 +539,6 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
kvm_flush_remote_tlbs(kvm);

-out_unlock:
if (locked)
KVM_MMU_UNLOCK(kvm);

@@ -605,8 +597,13 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,

/*
* .change_pte() must be surrounded by .invalidate_range_{start,end}(),
+ * If mmu_notifier_count is zero, then start() didn't find a relevant
+ * memslot and wasn't forced down the slow path; rechecking here is
+ * unnecessary.
*/
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
+ if (!kvm->mmu_notifier_count)
+ return;

kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
}
@@ -1398,7 +1395,8 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,

/*
* Do not store the new memslots while there are invalidations in
- * progress (preparatory change for the next commit).
+ * progress, otherwise the locking in invalidate_range_start and
+ * invalidate_range_end will be unbalanced.
*/
spin_lock(&kvm->mn_invalidate_lock);
prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait);
--
2.27.0