Re: [PATCH v9 5/6] KVM: x86: Migrate to __kvm_follow_pfn
From: Maxim Levitsky
Date: Tue Oct 03 2023 - 12:56:09 EST
У пн, 2023-09-11 у 11:16 +0900, David Stevens пише:
> From: David Stevens <stevensd@xxxxxxxxxxxx>
>
> Migrate functions which need access to is_refcounted_page to
> __kvm_follow_pfn. The functions which need this are __kvm_faultin_pfn
> and reexecute_instruction. The former requires replacing the async
> in/out parameter with FOLL_NOWAIT parameter and the KVM_PFN_ERR_NEEDS_IO
> return value. Handling non-refcounted pages is complicated, so it will
> be done in a followup. The latter is a straightforward refactor.
>
> APIC related callers do not need to migrate because KVM controls the
> memslot, so it will always be regular memory. Prefetch related callers
> do not need to be migrated because atomic gfn_to_pfn calls can never
> make it to hva_to_pfn_remapped.
>
> Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx>
> ---
> arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++++++++++----------
> arch/x86/kvm/x86.c | 12 ++++++++++--
> 2 files changed, 43 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e1d011c67cc6..e1eca26215e2 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4254,7 +4254,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
> static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> {
> struct kvm_memory_slot *slot = fault->slot;
> - bool async;
> + struct kvm_follow_pfn foll = {
> + .slot = slot,
> + .gfn = fault->gfn,
> + .flags = fault->write ? FOLL_WRITE : 0,
> + .try_map_writable = true,
> + .guarded_by_mmu_notifier = true,
> + .allow_non_refcounted_struct_page = false,
> + };
>
> /*
> * Retry the page fault if the gfn hit a memslot that is being deleted
> @@ -4283,12 +4290,20 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> return RET_PF_EMULATE;
> }
>
> - async = false;
> - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
> - fault->write, &fault->map_writable,
> - &fault->hva);
> - if (!async)
> - return RET_PF_CONTINUE; /* *pfn has correct page already */
> + foll.flags |= FOLL_NOWAIT;
> + fault->pfn = __kvm_follow_pfn(&foll);
> +
> + if (!is_error_noslot_pfn(fault->pfn))
> + goto success;
Unrelated but I can't say I like the 'is_error_noslot_pfn()' name,
I wish it was called something like is_valid_pfn().
> +
> + /*
> + * If __kvm_follow_pfn() failed because I/O is needed to fault in the
> + * page, then either set up an asynchronous #PF to do the I/O, or if
> + * doing an async #PF isn't possible, retry __kvm_follow_pfn() with
> + * I/O allowed. All other failures are fatal, i.e. retrying won't help.
> + */
> + if (fault->pfn != KVM_PFN_ERR_NEEDS_IO)
> + return RET_PF_CONTINUE;
>
> if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
> trace_kvm_try_async_get_page(fault->addr, fault->gfn);
> @@ -4306,9 +4321,17 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> * to wait for IO. Note, gup always bails if it is unable to quickly
> * get a page and a fatal signal, i.e. SIGKILL, is pending.
> */
> - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL,
> - fault->write, &fault->map_writable,
> - &fault->hva);
> + foll.flags |= FOLL_INTERRUPTIBLE;
> + foll.flags &= ~FOLL_NOWAIT;
> + fault->pfn = __kvm_follow_pfn(&foll);
> +
> + if (!is_error_noslot_pfn(fault->pfn))
> + goto success;
> +
> + return RET_PF_CONTINUE;
> +success:
> + fault->hva = foll.hva;
> + fault->map_writable = foll.writable;
> return RET_PF_CONTINUE;
> }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6c9c81e82e65..2011a7e47296 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8556,6 +8556,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> {
> gpa_t gpa = cr2_or_gpa;
> kvm_pfn_t pfn;
> + struct kvm_follow_pfn foll;
>
> if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
> return false;
> @@ -8585,7 +8586,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> * retry instruction -> write #PF -> emulation fail -> retry
> * instruction -> ...
> */
> - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
> + foll = (struct kvm_follow_pfn) {
> + .slot = gfn_to_memslot(vcpu->kvm, gpa_to_gfn(gpa)),
> + .gfn = gpa_to_gfn(gpa),
> + .flags = FOLL_WRITE,
> + .allow_non_refcounted_struct_page = true,
> + };
> + pfn = __kvm_follow_pfn(&foll);
>
> /*
> * If the instruction failed on the error pfn, it can not be fixed,
> @@ -8594,7 +8601,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> if (is_error_noslot_pfn(pfn))
> return false;
>
> - kvm_release_pfn_clean(pfn);
> + if (foll.is_refcounted_page)
> + kvm_release_page_clean(pfn_to_page(pfn));
>
> /* The instructions are well-emulated on direct mmu. */
> if (vcpu->arch.mmu->root_role.direct) {
Overall looks good, I might have missed something.
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky