Re: [PATCH v2 2/9] KVM: arm64: Add a range to __pkvm_host_share_guest()
From: Vincent Donnefort
Date: Fri Apr 04 2025 - 12:48:08 EST
On Thu, Apr 03, 2025 at 03:27:15PM +0000, Quentin Perret wrote:
> On Thursday 06 Mar 2025 at 11:00:31 (+0000), Vincent Donnefort wrote:
> > +int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
> > enum kvm_pgtable_prot prot)
> > {
> > struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
> > u64 phys = hyp_pfn_to_phys(pfn);
> > u64 ipa = hyp_pfn_to_phys(gfn);
> > + enum pkvm_page_state state;
> > struct hyp_page *page;
> > + u64 size;
> > int ret;
> >
> > if (prot & ~KVM_PGTABLE_PROT_RWX)
> > return -EINVAL;
> >
> > - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
> > + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
> > + if (ret)
> > + return ret;
> > +
> > + ret = check_range_allowed_memory(phys, phys + size);
> > if (ret)
> > return ret;
> >
> > host_lock_component();
> > guest_lock_component(vm);
> >
> > - ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
> > + ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
> > if (ret)
> > goto unlock;
> >
> > - page = hyp_phys_to_page(phys);
> > - switch (page->host_state) {
> > + state = hyp_phys_to_page(phys)->host_state;
> > + for_each_hyp_page(phys, size, page) {
> > + if (page->host_state != state) {
> > + ret = -EPERM;
> > + goto unlock;
> > + }
> > + }
> > +
> > + switch (state) {
> > case PKVM_PAGE_OWNED:
> > - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
> > + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
> > break;
> > case PKVM_PAGE_SHARED_OWNED:
> > - if (page->host_share_guest_count)
> > - break;
> > - /* Only host to np-guest multi-sharing is tolerated */
> > - WARN_ON(1);
> > - fallthrough;
> > + for_each_hyp_page(phys, size, page) {
> > + /* Only host to np-guest multi-sharing is tolerated */
> > + if (WARN_ON(!page->host_share_guest_count)) {
> > + ret = -EPERM;
> > + goto unlock;
> > + }
> > + }
> > + break;
> > default:
> > ret = -EPERM;
> > goto unlock;
> > }
> >
> > - WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
> > + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
> > pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
> > &vcpu->vcpu.arch.pkvm_memcache, 0));
> > - page->host_share_guest_count++;
> > + __host_update_share_guest_count(phys, size, true);
>
> So we're walking the entire phys range 3 times;
>
> 1. to check the host_state is consistent with that of the first
> page;
>
> 2. to set the state to SHARED_OWNED or to check the
> host_share_guest_count;
>
> 3. and then again here to update the host share guest count
>
> I feel like we could probably remove at least one loop with a pattern
> like so:
>
> for_each_hyp_page(phys, size, page) {
> switch (page->state) {
> case PKVM_PAGE_OWNED:
> continue;
> case PKVM_PAGE_SHARED_BORROWED:
> if (page->host_shared_guest_count)
> continue;
> fallthrough;
> default;
> ret = -EPERM;
> goto unlock;
> }
> }
>
> for_each_hyp_page(phys, size, page) {
> page->host_state = PKVM_PAGE_SHARED_OWNED;
> page->host_share_guest_count++;
> }
>
> That would also tolerate a mix of OWNED and SHARED_OWNED page in the
> range, which I'm not sure is needed but it doesn't cost us anything to
> support so ... :-)
>
> Wdyt?
That sounds good, I'll drop __host_update_share_guest_count at the same
time to fold it directly into the share/unshare functions.
>
> > unlock:
> > guest_unlock_component(vm);
> > diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> > index 930b677eb9b0..00fd9a524bf7 100644
> > --- a/arch/arm64/kvm/pkvm.c
> > +++ b/arch/arm64/kvm/pkvm.c
> > @@ -361,7 +361,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
> > return -EINVAL;
> >
> > lockdep_assert_held_write(&kvm->mmu_lock);
> > - ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, 1, prot);
> > if (ret) {
> > /* Is the gfn already mapped due to a racing vCPU? */
> > if (ret == -EPERM)
> > --
> > 2.48.1.711.g2feabab25a-goog
> >