[PATCH v6 2/4] KVM: x86/mmu: use gfn_to_pfn_noref

From: David Stevens
Date: Thu Mar 30 2023 - 04:58:30 EST


From: David Stevens <stevensd@xxxxxxxxxxxx>

Switch the x86 mmu to the new gfn_to_pfn_noref functions. This allows IO
and PFNMAP mappings backed with valid struct pages but without
refcounting (e.g. tail pages of non-compound higher order allocations)
to be mapped into the guest.

Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 19 ++++++++++---------
arch/x86/kvm/mmu/mmu_internal.h | 1 +
arch/x86/kvm/mmu/paging_tmpl.h | 7 ++++---
arch/x86/kvm/x86.c | 5 +++--
4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 144c5a01cd77..86b74e7bccfa 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3114,7 +3114,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (unlikely(fault->max_level == PG_LEVEL_4K))
return;

- if (is_error_noslot_pfn(fault->pfn))
+ if (!fault->page)
return;

if (kvm_slot_dirty_track_enabled(slot))
@@ -4224,6 +4224,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (is_guest_mode(vcpu)) {
fault->slot = NULL;
fault->pfn = KVM_PFN_NOSLOT;
+ fault->page = NULL;
fault->map_writable = false;
return RET_PF_CONTINUE;
}
@@ -4239,9 +4240,9 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
}

async = false;
- fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
- fault->write, &fault->map_writable,
- &fault->hva);
+ fault->pfn = __gfn_to_pfn_noref_memslot(slot, fault->gfn, false, false, &async,
+ fault->write, &fault->map_writable,
+ &fault->hva, &fault->page);
if (!async)
return RET_PF_CONTINUE; /* *pfn has correct page already */

@@ -4261,9 +4262,9 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
* to wait for IO. Note, gup always bails if it is unable to quickly
* get a page and a fatal signal, i.e. SIGKILL, is pending.
*/
- fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL,
- fault->write, &fault->map_writable,
- &fault->hva);
+ fault->pfn = __gfn_to_pfn_noref_memslot(slot, fault->gfn, false, true, NULL,
+ fault->write, &fault->map_writable,
+ &fault->hva, &fault->page);
return RET_PF_CONTINUE;
}

@@ -4349,7 +4350,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault

out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_noref_clean(fault->pfn, fault->page);
return r;
}

@@ -4427,7 +4428,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,

out_unlock:
read_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_noref_clean(fault->pfn, fault->page);
return r;
}
#endif
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 2cbb155c686c..6ee34a2d0e13 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -239,6 +239,7 @@ struct kvm_page_fault {
unsigned long mmu_seq;
kvm_pfn_t pfn;
hva_t hva;
+ struct page *page;
bool map_writable;

/*
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index a056f2773dd9..e4e54e372721 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -525,6 +525,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
unsigned pte_access;
gfn_t gfn;
kvm_pfn_t pfn;
+ struct page *page;

if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
return false;
@@ -540,12 +541,12 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (!slot)
return false;

- pfn = gfn_to_pfn_memslot_atomic(slot, gfn);
+ pfn = gfn_to_pfn_noref_memslot_atomic(slot, gfn, &page);
if (is_error_pfn(pfn))
return false;

mmu_set_spte(vcpu, slot, spte, pte_access, gfn, pfn, NULL);
- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_noref_clean(pfn, page);
return true;
}

@@ -830,7 +831,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault

out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_noref_clean(fault->pfn, fault->page);
return r;
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 237c483b1230..53a8c9e776e5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8458,6 +8458,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
{
gpa_t gpa = cr2_or_gpa;
kvm_pfn_t pfn;
+ struct page *page;

if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
return false;
@@ -8487,7 +8488,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* retry instruction -> write #PF -> emulation fail -> retry
* instruction -> ...
*/
- pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
+ pfn = gfn_to_pfn_noref(vcpu->kvm, gpa_to_gfn(gpa), &page);

/*
* If the instruction failed on the error pfn, it can not be fixed,
@@ -8496,7 +8497,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (is_error_noslot_pfn(pfn))
return false;

- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_noref_clean(pfn, page);

/* The instructions are well-emulated on direct mmu. */
if (vcpu->arch.mmu->root_role.direct) {
--
2.40.0.348.gf938b09366-goog