Re: [PATCH v2 11/18] KVM: arm64: Introduce __pkvm_host_unshare_guest()

From: Quentin Perret
Date: Tue Dec 10 2024 - 10:53:21 EST


On Tuesday 10 Dec 2024 at 14:41:12 (+0000), Fuad Tabba wrote:
> Hi Quentin,
>
> On Tue, 3 Dec 2024 at 10:38, Quentin Perret <qperret@xxxxxxxxxx> wrote:
> >
> > In preparation for letting the host unmap pages from non-protected
> > guests, introduce a new hypercall implementing the host-unshare-guest
> > transition.
> >
> > Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx>
> > ---
> > arch/arm64/include/asm/kvm_asm.h | 1 +
> > arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
> > arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 5 ++
> > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 24 +++++++
> > arch/arm64/kvm/hyp/nvhe/mem_protect.c | 67 +++++++++++++++++++
> > 5 files changed, 98 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index 449337f5b2a3..0b6c4d325134 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -66,6 +66,7 @@ enum __kvm_host_smccc_func {
> > __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
> > __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
> > __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
> > + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
> > __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
> > __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
> > __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
> > diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> > index a7976e50f556..e528a42ed60e 100644
> > --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> > +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> > @@ -40,6 +40,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
> > int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
> > int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
> > int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
> > +int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
>
> The parameters of share_guest and unshare_guest are quite different. I
> think that the unshare makes more sense, that it uses the hyp_vm as
> opposed to the hyp_vcpu. Still, I think that one of the two should
> change.

Hmm, so that is actually a bit difficult. __pkvm_host_share_guest() is
guaranteed to always be called when a vCPU is loaded, and it needs to
use the per-vCPU memcache so we can't just give it the pkvm_hyp_vm as
is.

And on the other hand, __pkvm_host_unshare_guest() can end up being
called from MMU notifier where no vCPU is loaded, so it's not clear
which vCPU it should be using. We also just don't need to access
per-vCPU data-structures on that path (the unmap call can only free
page-table pages, which are always put back into the per-guest pool
directly, not in a memcache).

> > bool addr_is_memory(phys_addr_t phys);
> > int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
> > diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
> > index be52c5b15e21..5dfc9ece9aa5 100644
> > --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
> > +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
> > @@ -64,6 +64,11 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
> > return vcpu_is_protected(&hyp_vcpu->vcpu);
> > }
> >
> > +static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm)
> > +{
> > + return kvm_vm_is_protected(&hyp_vm->kvm);
> > +}
> > +
> > void pkvm_hyp_vm_table_init(void *tbl);
> >
> > int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
> > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > index d659462fbf5d..04a9053ae1d5 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > @@ -244,6 +244,29 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
> > cpu_reg(host_ctxt, 1) = ret;
> > }
> >
> > +static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
> > +{
> > + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
> > + DECLARE_REG(u64, gfn, host_ctxt, 2);
> > + struct pkvm_hyp_vm *hyp_vm;
> > + int ret = -EINVAL;
> > +
> > + if (!is_protected_kvm_enabled())
> > + goto out;
> > +
> > + hyp_vm = get_pkvm_hyp_vm(handle);
> > + if (!hyp_vm)
> > + goto out;
> > + if (pkvm_hyp_vm_is_protected(hyp_vm))
> > + goto put_hyp_vm;
>
> bikeshedding: is -EINVAL the best return value, or might -EPERM be
> better if the VM is protected?

-EINVAL makes the code marginally simpler, especially given that we have
this pattern all across hyp-main.c, so I have a minor personal
preference for keeping it as-is, but no strong opinion really. This
really shouldn't ever hit at run-time, modulo major bugs or a malicious
host, so probably not a huge deal if EINVAL isn't particularly accurate.

> > +
> > + ret = __pkvm_host_unshare_guest(gfn, hyp_vm);
> > +put_hyp_vm:
> > + put_pkvm_hyp_vm(hyp_vm);
> > +out:
> > + cpu_reg(host_ctxt, 1) = ret;
> > +}
> > +
> > static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
> > {
> > DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
> > @@ -454,6 +477,7 @@ static const hcall_t host_hcall[] = {
> > HANDLE_FUNC(__pkvm_host_share_hyp),
> > HANDLE_FUNC(__pkvm_host_unshare_hyp),
> > HANDLE_FUNC(__pkvm_host_share_guest),
> > + HANDLE_FUNC(__pkvm_host_unshare_guest),
> > HANDLE_FUNC(__kvm_adjust_pc),
> > HANDLE_FUNC(__kvm_vcpu_run),
> > HANDLE_FUNC(__kvm_flush_vm_context),
> > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > index a69d7212b64c..aa27a3e42e5e 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > @@ -1413,3 +1413,70 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
> >
> > return ret;
> > }
> > +
> > +static int __check_host_unshare_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa)
>
> nit: sometimes (in this and other patches) you use vm to refer to
> pkvm_hyp_vm, and other times you use hyp_vm. Makes grepping/searching
> a bit more tricky.

Ack, I'll do a pass on the series to improve the consistency.

> > +{
> > + enum pkvm_page_state state;
> > + struct hyp_page *page;
> > + kvm_pte_t pte;
> > + u64 phys;
> > + s8 level;
> > + int ret;
> > +
> > + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
> > + if (ret)
> > + return ret;
> > + if (level != KVM_PGTABLE_LAST_LEVEL)
> > + return -E2BIG;
> > + if (!kvm_pte_valid(pte))
> > + return -ENOENT;
> > +
> > + state = guest_get_page_state(pte, ipa);
> > + if (state != PKVM_PAGE_SHARED_BORROWED)
> > + return -EPERM;
> > +
> > + phys = kvm_pte_to_phys(pte);
> > + ret = range_is_allowed_memory(phys, phys + PAGE_SIZE);
> > + if (WARN_ON(ret))
> > + return ret;
> > +
> > + page = hyp_phys_to_page(phys);
> > + if (page->host_state != PKVM_PAGE_SHARED_OWNED)
> > + return -EPERM;
> > + if (WARN_ON(!page->host_share_guest_count))
> > + return -EINVAL;
> > +
> > + *__phys = phys;
> > +
> > + return 0;
> > +}
> > +
> > +int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm)
> > +{
> > + u64 ipa = hyp_pfn_to_phys(gfn);
> > + struct hyp_page *page;
> > + u64 phys;
> > + int ret;
> > +
> > + host_lock_component();
> > + guest_lock_component(hyp_vm);
> > +
> > + ret = __check_host_unshare_guest(hyp_vm, &phys, ipa);
> > + if (ret)
> > + goto unlock;
> > +
> > + ret = kvm_pgtable_stage2_unmap(&hyp_vm->pgt, ipa, PAGE_SIZE);
> > + if (ret)
> > + goto unlock;
> > +
> > + page = hyp_phys_to_page(phys);
> > + page->host_share_guest_count--;
> > + if (!page->host_share_guest_count)
> > + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED));
> > +
> > +unlock:
> > + guest_unlock_component(hyp_vm);
> > + host_unlock_component();
> > +
> > + return ret;
> > +}
> > --
> > 2.47.0.338.g60cca15819-goog
> >