Re: [RFC PATCH v12 02/33] KVM: Use gfn instead of hva for mmu_notifier_retry

From: Binbin Wu
Date: Wed Sep 13 2023 - 23:07:34 EST




On 9/14/2023 9:55 AM, Sean Christopherson wrote:
From: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>

Currently in mmu_notifier invalidate path, hva range is recorded and
then checked against by mmu_notifier_retry_hva() in the page fault
handling path. However, for the to be introduced private memory, a page
fault may not have a hva associated, checking gfn(gpa) makes more sense.

For existing hva based shared memory, gfn is expected to also work. The
only downside is when aliasing multiple gfns to a single hva, the
current algorithm of checking multiple ranges could result in a much
larger range being rejected. Such aliasing should be uncommon, so the
impact is expected small.

Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Reviewed-by: Fuad Tabba <tabba@xxxxxxxxxx>
Tested-by: Fuad Tabba <tabba@xxxxxxxxxx>
[sean: convert vmx_set_apic_access_page_addr() to gfn-based API]
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 10 ++++++----
arch/x86/kvm/vmx/vmx.c | 11 +++++------
include/linux/kvm_host.h | 33 +++++++++++++++++++++------------
virt/kvm/kvm_main.c | 40 +++++++++++++++++++++++++++++++---------
4 files changed, 63 insertions(+), 31 deletions(-)

[...]
-void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void kvm_mmu_invalidate_begin(struct kvm *kvm)
{
+ lockdep_assert_held_write(&kvm->mmu_lock);
/*
* The count increase must become visible at unlock time as no
* spte can be established without taking the mmu_lock and
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_invalidate_in_progress++;
+
+ if (likely(kvm->mmu_invalidate_in_progress == 1))
+ kvm->mmu_invalidate_range_start = INVALID_GPA;
+}
+
+void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end)
+{
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ WARN_ON_ONCE(!kvm->mmu_invalidate_in_progress);
+
if (likely(kvm->mmu_invalidate_in_progress == 1)) {
kvm->mmu_invalidate_range_start = start;
kvm->mmu_invalidate_range_end = end;
@@ -771,6 +781,12 @@ void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
}
}
+static bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ kvm_mmu_invalidate_range_add(kvm, range->start, range->end);
+ return kvm_unmap_gfn_range(kvm, range);
+}
+
static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
{
@@ -778,7 +794,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
const struct kvm_mmu_notifier_range hva_range = {
.start = range->start,
.end = range->end,
- .handler = kvm_unmap_gfn_range,
+ .handler = kvm_mmu_unmap_gfn_range,
.on_lock = kvm_mmu_invalidate_begin,
.on_unlock = kvm_arch_guest_memory_reclaimed,
.flush_on_ret = true,
@@ -817,8 +833,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
return 0;
}
-void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void kvm_mmu_invalidate_end(struct kvm *kvm)
{
/*
* This sequence increase will notify the kvm page fault that
@@ -833,6 +848,13 @@ void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
* in conjunction with the smp_rmb in mmu_invalidate_retry().
*/
kvm->mmu_invalidate_in_progress--;
+
+ /*
+ * Assert that at least one range must be added between start() and
+ * end(). Not adding a range isn't fatal, but it is a KVM bug.
+ */
+ WARN_ON_ONCE(kvm->mmu_invalidate_in_progress &&
+ kvm->mmu_invalidate_range_start == INVALID_GPA);
Should the check happen before the decrease of kvm->mmu_invalidate_in_progress?
Otherwise, KVM calls kvm_mmu_invalidate_begin(), then kvm_mmu_invalidate_end()
the check will not take effect.

}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,