Re: [PATCH 09/11] KVM: guest_memfd: Add interface for populating gmem pages with user data

From: Paolo Bonzini
Date: Fri Apr 26 2024 - 01:45:15 EST


On Thu, Apr 25, 2024 at 6:51 PM Isaku Yamahata <isaku.yamahata@xxxxxxxxx> wrote:
> > AFAIK, unwinding on failure is completely uninteresting, and arguably undesirable,
> > because undoing LAUNCH_UPDATE or PAGE.ADD will affect the measurement, i.e. there
> > is no scenario where deleting pages from guest_memfd would allow a restart/resume
> > of the build process to truly succeed.
>
>
> Just for record. With the following twist to kvm_gmem_populate,
> KVM_TDX_INIT_MEM_REGION can use kvm_gmem_populate(). For those who are curious,
> I also append the callback implementation at the end.

Nice, thank you very much. Since TDX does not need
HAVE_KVM_GMEM_PREPARE, if I get rid of FGP_CREAT_ONLY it will work for
you, right?

Paolo

>
> --
>
> include/linux/kvm_host.h | 2 ++
> virt/kvm/guest_memfd.c | 3 ++-
> 2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index df957c9f9115..7c86b77f8895 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2460,6 +2460,7 @@ bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
> * (passed to @post_populate, and incremented on each iteration
> * if not NULL)
> * @npages: number of pages to copy from userspace-buffer
> + * @prepare: Allow page allocation to invoke gmem_prepare hook
> * @post_populate: callback to issue for each gmem page that backs the GPA
> * range
> * @opaque: opaque data to pass to @post_populate callback
> @@ -2473,6 +2474,7 @@ bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
> * Returns the number of pages that were populated.
> */
> long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
> + bool prepare,
> int (*post_populate)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
> void __user *src, int order, void *opaque),
> void *opaque);
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 3195ceefe915..18809e6dea8a 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -638,6 +638,7 @@ static int kvm_gmem_undo_get_pfn(struct file *file, struct kvm_memory_slot *slot
> }
>
> long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
> + bool prepare,
> int (*post_populate)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
> void __user *src, int order, void *opaque),
> void *opaque)
> @@ -667,7 +668,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages
> gfn_t this_gfn = gfn + i;
> kvm_pfn_t pfn;
>
> - ret = __kvm_gmem_get_pfn(file, slot, this_gfn, &pfn, &max_order, false);
> + ret = __kvm_gmem_get_pfn(file, slot, this_gfn, &pfn, &max_order, prepare);
> if (ret)
> break;
>
> --
> 2.43.2
>
>
> Here is the callback for KVM_TDX_INIT_MEM_REGION.
> Note: the caller of kvm_gmem_populate() acquires mutex_lock(&kvm->slots_lock)
> and idx = srcu_read_lock(&kvm->srcu).
>
>
> struct tdx_gmem_post_populate_arg {
> struct kvm_vcpu *vcpu;
> __u32 flags;
> };
>
> static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
> void __user *src, int order, void *_arg)
> {
> struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> struct tdx_gmem_post_populate_arg *arg = _arg;
> struct kvm_vcpu *vcpu = arg->vcpu;
> struct kvm_memory_slot *slot;
> gpa_t gpa = gfn_to_gpa(gfn);
> struct page *page;
> kvm_pfn_t mmu_pfn;
> int ret, i;
> u64 err;
>
> /* Pin the source page. */
> ret = get_user_pages_fast((unsigned long)src, 1, 0, &page);
> if (ret < 0)
> return ret;
> if (ret != 1)
> return -ENOMEM;
>
> slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
> if (!kvm_slot_can_be_private(slot) || !kvm_mem_is_private(kvm, gfn)) {
> ret = -EFAULT;
> goto out_put_page;
> }
>
> read_lock(&kvm->mmu_lock);
>
> ret = kvm_tdp_mmu_get_walk_private_pfn(vcpu, gpa, &mmu_pfn);
> if (ret < 0)
> goto out;
> if (ret > PG_LEVEL_4K) {
> ret = -EINVAL;
> goto out;
> }
> if (mmu_pfn != pfn) {
> ret = -EAGAIN;
> goto out;
> }
>
> ret = 0;
> do {
> err = tdh_mem_page_add(kvm_tdx, gpa, pfn_to_hpa(pfn),
> pfn_to_hpa(page_to_pfn(page)), NULL);
> } while (err == TDX_ERROR_SEPT_BUSY);
> if (err) {
> ret = -EIO;
> goto out;
> }
>
> WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped));
> atomic64_dec(&kvm_tdx->nr_premapped);
> tdx_account_td_pages(vcpu->kvm, PG_LEVEL_4K);
>
> if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) {
> for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) {
> err = tdh_mr_extend(kvm_tdx, gpa + i, NULL);
> if (err) {
> ret = -EIO;
> break;
> }
> }
> }
>
> out:
> read_unlock(&kvm->mmu_lock);
> out_put_page:
> put_page(page);
> return ret;
> }
>
> --
> Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
>