[PATCH v4 4/7] KVM: Move MMU lock acquisition for test/clear_young to architecture

From: James Houghton
Date: Wed May 29 2024 - 14:06:50 EST


For implementation mmu_notifier_{test,clear}_young, the KVM memslot
walker used to take the MMU lock for us. Now make the architectures
take it themselves.

Don't relax locking for any architecture except powerpc e500; its
implementations of kvm_age_gfn and kvm_test_age_gfn simply return false,
so there is no need to grab the KVM MMU lock.

Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
---
arch/arm64/kvm/mmu.c | 30 ++++++++++++++++++++++--------
arch/loongarch/kvm/mmu.c | 20 +++++++++++++++-----
arch/mips/kvm/mmu.c | 21 ++++++++++++++++-----
arch/powerpc/kvm/book3s.c | 14 ++++++++++++--
arch/riscv/kvm/mmu.c | 26 ++++++++++++++++++++------
arch/x86/kvm/mmu/mmu.c | 8 ++++++++
virt/kvm/kvm_main.c | 4 ++--
7 files changed, 95 insertions(+), 28 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 8bcab0cc3fe9..8337009dde77 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1773,25 +1773,39 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
+ bool young = false;
+
+ write_lock(&kvm->mmu_lock);

if (!kvm->arch.mmu.pgt)
- return false;
+ goto out;

- return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
- range->start << PAGE_SHIFT,
- size, true);
+ young = kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+ range->start << PAGE_SHIFT,
+ size, true);
+
+out:
+ write_unlock(&kvm->mmu_lock);
+ return young;
}

bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
+ bool young = false;
+
+ write_lock(&kvm->mmu_lock);

if (!kvm->arch.mmu.pgt)
- return false;
+ goto out;

- return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
- range->start << PAGE_SHIFT,
- size, false);
+ young = kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+ range->start << PAGE_SHIFT,
+ size, false);
+
+out:
+ write_unlock(&kvm->mmu_lock);
+ return young;
}

phys_addr_t kvm_mmu_get_httbr(void)
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
index 98883aa23ab8..5eb262bcf6b0 100644
--- a/arch/loongarch/kvm/mmu.c
+++ b/arch/loongarch/kvm/mmu.c
@@ -497,24 +497,34 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_ptw_ctx ctx;
+ bool young;
+
+ spin_lock(&kvm->mmu_lock);

ctx.flag = 0;
ctx.ops = kvm_mkold_pte;
kvm_ptw_prepare(kvm, &ctx);

- return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT,
+ young = kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT,
range->end << PAGE_SHIFT, &ctx);
+
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
gpa_t gpa = range->start << PAGE_SHIFT;
- kvm_pte_t *ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+ kvm_pte_t *ptep;
+ bool young;

- if (ptep && kvm_pte_present(NULL, ptep) && kvm_pte_young(*ptep))
- return true;
+ spin_lock(&kvm->mmu_lock);
+ ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);

- return false;
+ young = ptep && kvm_pte_present(NULL, ptep) && kvm_pte_young(*ptep);
+
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

/*
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index c17157e700c0..db3b7cf22db1 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -446,17 +446,28 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)

bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
+ bool young;
+
+ spin_lock(&kvm->mmu_lock);
+ young = kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
gpa_t gpa = range->start << PAGE_SHIFT;
- pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
+ pte_t *gpa_pte;
+ bool young = false;

- if (!gpa_pte)
- return false;
- return pte_young(*gpa_pte);
+ spin_lock(&kvm->mmu_lock);
+ gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
+
+ if (gpa_pte)
+ young = pte_young(*gpa_pte);
+
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

/**
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ff6c38373957..f503ab9ac3a5 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -887,12 +887,22 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)

bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- return kvm->arch.kvm_ops->age_gfn(kvm, range);
+ bool young;
+
+ spin_lock(&kvm->mmu_lock);
+ young = kvm->arch.kvm_ops->age_gfn(kvm, range);
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
+ bool young;
+
+ spin_lock(&kvm->mmu_lock);
+ young = kvm->arch.kvm_ops->test_age_gfn(kvm, range);
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

int kvmppc_core_init_vm(struct kvm *kvm)
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index b63650f9b966..c78abe8041fb 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -555,17 +555,24 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
pte_t *ptep;
u32 ptep_level = 0;
u64 size = (range->end - range->start) << PAGE_SHIFT;
+ bool young = false;
+
+ spin_lock(&kvm->mmu_lock);

if (!kvm->arch.pgd)
- return false;
+ goto out;

WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);

if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
- return false;
+ goto out;
+
+ young = ptep_test_and_clear_young(NULL, 0, ptep);

- return ptep_test_and_clear_young(NULL, 0, ptep);
+out:
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -573,17 +580,24 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
pte_t *ptep;
u32 ptep_level = 0;
u64 size = (range->end - range->start) << PAGE_SHIFT;
+ bool young = false;
+
+ spin_lock(&kvm->mmu_lock);

if (!kvm->arch.pgd)
- return false;
+ goto out;

WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);

if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
- return false;
+ goto out;
+
+ young = pte_young(ptep_get(ptep));

- return pte_young(ptep_get(ptep));
+out:
+ spin_unlock(&kvm->mmu_lock);
+ return young;
}

int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 662f62dfb2aa..6a2a557c2c31 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1630,12 +1630,16 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool young = false;

+ write_lock(&kvm->mmu_lock);
+
if (kvm_memslots_have_rmaps(kvm))
young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap);

if (tdp_mmu_enabled)
young |= kvm_tdp_mmu_age_gfn_range(kvm, range);

+ write_unlock(&kvm->mmu_lock);
+
return young;
}

@@ -1643,12 +1647,16 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool young = false;

+ write_lock(&kvm->mmu_lock);
+
if (kvm_memslots_have_rmaps(kvm))
young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap);

if (tdp_mmu_enabled)
young |= kvm_tdp_mmu_test_age_gfn(kvm, range);

+ write_unlock(&kvm->mmu_lock);
+
return young;
}

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d197b6725cb3..8d2d3acf18d8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -901,7 +901,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
* more sophisticated heuristic later.
*/
return kvm_handle_hva_range_no_flush(mn, start, end,
- kvm_age_gfn, false);
+ kvm_age_gfn, true);
}

static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
@@ -911,7 +911,7 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
trace_kvm_test_age_hva(address);

return kvm_handle_hva_range_no_flush(mn, address, address + 1,
- kvm_test_age_gfn, false);
+ kvm_test_age_gfn, true);
}

static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
--
2.45.1.288.g0e0cd299f1-goog