Re: [PATCH v13 10/26] KVM: SEV: Add KVM_SEV_SNP_LAUNCH_UPDATE command

From: Paolo Bonzini
Date: Fri Apr 19 2024 - 07:57:02 EST


On Thu, Apr 18, 2024 at 9:42 PM Michael Roth <michael.roth@xxxxxxx> wrote:
>
> From: Brijesh Singh <brijesh.singh@xxxxxxx>
>
> A key aspect of a launching an SNP guest is initializing it with a
> known/measured payload which is then encrypted into guest memory as
> pre-validated private pages and then measured into the cryptographic
> launch context created with KVM_SEV_SNP_LAUNCH_START so that the guest
> can attest itself after booting.
>
> Since all private pages are provided by guest_memfd, make use of the
> kvm_gmem_populate() interface to handle this. The general flow is that
> guest_memfd will handle allocating the pages associated with the GPA
> ranges being initialized by each particular call of
> KVM_SEV_SNP_LAUNCH_UPDATE, copying data from userspace into those pages,
> and then the post_populate callback will do the work of setting the
> RMP entries for these pages to private and issuing the SNP firmware
> calls to encrypt/measure them.
>
> For more information see the SEV-SNP specification.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
> Co-developed-by: Michael Roth <michael.roth@xxxxxxx>
> Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
> Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
> ---
> .../virt/kvm/x86/amd-memory-encryption.rst | 39 ++++
> arch/x86/include/uapi/asm/kvm.h | 15 ++
> arch/x86/kvm/svm/sev.c | 218 ++++++++++++++++++
> 3 files changed, 272 insertions(+)
>
> diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> index 1b042f827eab..1ee8401de72d 100644
> --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> @@ -478,6 +478,45 @@ Returns: 0 on success, -negative on error
>
> See the SEV-SNP spec [snp-fw-abi]_ for further detail on the launch input.
>
> +19. KVM_SEV_SNP_LAUNCH_UPDATE
> +-----------------------------
> +
> +The KVM_SEV_SNP_LAUNCH_UPDATE command is used for loading userspace-provided
> +data into a guest GPA range, measuring the contents into the SNP guest context
> +created by KVM_SEV_SNP_LAUNCH_START, and then encrypting/validating that GPA
> +range so that it will be immediately readable using the encryption key
> +associated with the guest context once it is booted, after which point it can
> +attest the measurement associated with its context before unlocking any
> +secrets.
> +
> +It is required that the GPA ranges initialized by this command have had the
> +KVM_MEMORY_ATTRIBUTE_PRIVATE attribute set in advance. See the documentation
> +for KVM_SET_MEMORY_ATTRIBUTES for more details on this aspect.
> +
> +Parameters (in): struct kvm_sev_snp_launch_update
> +
> +Returns: 0 on success, -negative on error
> +
> +::
> +
> + struct kvm_sev_snp_launch_update {
> + __u64 gfn_start; /* Guest page number to load/encrypt data into. */
> + __u64 uaddr; /* Userspace address of data to be loaded/encrypted. */
> + __u32 len; /* 4k-aligned length in bytes to copy into guest memory.*/
> + __u8 type; /* The type of the guest pages being initialized. */
> + };
> +
> +where the allowed values for page_type are #define'd as::
> +
> + KVM_SEV_SNP_PAGE_TYPE_NORMAL
> + KVM_SEV_SNP_PAGE_TYPE_ZERO
> + KVM_SEV_SNP_PAGE_TYPE_UNMEASURED
> + KVM_SEV_SNP_PAGE_TYPE_SECRETS
> + KVM_SEV_SNP_PAGE_TYPE_CPUID
> +
> +See the SEV-SNP spec [snp-fw-abi]_ for further details on how each page type is
> +used/measured.
> +
> Device attribute API
> ====================
>
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index bdf8c5461a36..8612aec97f55 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -699,6 +699,7 @@ enum sev_cmd_id {
>
> /* SNP-specific commands */
> KVM_SEV_SNP_LAUNCH_START = 100,
> + KVM_SEV_SNP_LAUNCH_UPDATE,
>
> KVM_SEV_NR_MAX,
> };
> @@ -830,6 +831,20 @@ struct kvm_sev_snp_launch_start {
> __u8 gosvw[16];
> };
>
> +/* Kept in sync with firmware values for simplicity. */
> +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
> +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
> +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
> +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
> +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
> +
> +struct kvm_sev_snp_launch_update {
> + __u64 gfn_start;
> + __u64 uaddr;
> + __u32 len;
> + __u8 type;
> +};
> +
> #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
> #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 4c5abc0e7806..e721152bae00 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -262,6 +262,35 @@ static void sev_decommission(unsigned int handle)
> sev_guest_decommission(&decommission, NULL);
> }
>
> +static int snp_page_reclaim(u64 pfn)
> +{
> + struct sev_data_snp_page_reclaim data = {0};
> + int err, rc;
> +
> + data.paddr = __sme_set(pfn << PAGE_SHIFT);
> + rc = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err);
> + if (WARN_ON_ONCE(rc)) {
> + /*
> + * This shouldn't happen under normal circumstances, but if the
> + * reclaim failed, then the page is no longer safe to use.
> + */
> + snp_leak_pages(pfn, 1);
> + }
> +
> + return rc;
> +}
> +
> +static int host_rmp_make_shared(u64 pfn, enum pg_level level)
> +{
> + int rc;
> +
> + rc = rmp_make_shared(pfn, level);
> + if (rc)
> + snp_leak_pages(pfn, page_level_size(level) >> PAGE_SHIFT);
> +
> + return rc;
> +}
> +
> static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
> {
> struct sev_data_deactivate deactivate;
> @@ -2131,6 +2160,192 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
> return rc;
> }
>
> +struct sev_gmem_populate_args {
> + __u8 type;
> + int sev_fd;
> + int fw_error;
> +};
> +
> +static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pfn,
> + void __user *src, int order, void *opaque)
> +{
> + struct sev_gmem_populate_args *sev_populate_args = opaque;
> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> + int n_private = 0, ret, i;
> + int npages = (1 << order);
> + gfn_t gfn;
> +
> + pr_debug("%s: gfn_start %llx pfn_start %llx npages %d\n",
> + __func__, gfn_start, pfn, npages);
> +
> + for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) {
> + struct sev_data_snp_launch_update fw_args = {0};
> + bool assigned;
> + void *vaddr;
> + int level;
> +
> + if (!kvm_mem_is_private(kvm, gfn)) {
> + pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
> + __func__, gfn);
> + ret = -EINVAL;
> + break;
> + }
> +
> + ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
> + if (ret || assigned) {
> + pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
> + __func__, gfn, ret, assigned);
> + ret = -EINVAL;
> + break;
> + }
> +
> + vaddr = kmap_local_pfn(pfn + i);
> + ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE);
> + if (ret) {
> + pr_debug("Failed to copy source page into GFN 0x%llx\n", gfn);
> + goto out_unmap;
> + }
> +
> + ret = rmp_make_private(pfn + i, gfn << PAGE_SHIFT, PG_LEVEL_4K,
> + sev_get_asid(kvm), true);
> + if (ret) {
> + pr_debug("%s: Failed to convert GFN 0x%llx to private, ret: %d\n",
> + __func__, gfn, ret);
> + goto out_unmap;
> + }
> +
> + n_private++;
> +
> + fw_args.gctx_paddr = __psp_pa(sev->snp_context);
> + fw_args.address = __sme_set(pfn_to_hpa(pfn + i));
> + fw_args.page_size = PG_LEVEL_TO_RMP(PG_LEVEL_4K);
> + fw_args.page_type = sev_populate_args->type;
> + ret = __sev_issue_cmd(sev_populate_args->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
> + &fw_args, &sev_populate_args->fw_error);
> + if (ret) {
> + pr_debug("%s: SEV-SNP launch update failed, ret: 0x%x, fw_error: 0x%x\n",
> + __func__, ret, sev_populate_args->fw_error);
> +
> + if (snp_page_reclaim(pfn + i))
> + goto out_unmap;
> +
> + /*
> + * When invalid CPUID function entries are detected,
> + * firmware writes the expected values into the page and
> + * leaves it unencrypted so it can be used for debugging
> + * and error-reporting.
> + *
> + * Copy this page back into the source buffer so
> + * userspace can use this information to provide
> + * information on which CPUID leaves/fields failed CPUID
> + * validation.
> + */
> + if (sev_populate_args->type == KVM_SEV_SNP_PAGE_TYPE_CPUID &&
> + sev_populate_args->fw_error == SEV_RET_INVALID_PARAM) {
> + host_rmp_make_shared(pfn + i, PG_LEVEL_4K);
> +
> + if (copy_to_user(src + i * PAGE_SIZE,
> + vaddr, PAGE_SIZE))
> + pr_debug("Failed to write CPUID page back to userspace\n");
> + }
> + }
> +
> +out_unmap:
> + kunmap_local(vaddr);
> + if (ret)
> + break;
> + }
> +
> + if (ret) {
> + pr_debug("%s: exiting with error ret %d, undoing %d populated gmem pages.\n",
> + __func__, ret, n_private);
> + for (i = 0; i < n_private; i++)
> + host_rmp_make_shared(pfn + i, PG_LEVEL_4K);
> + }
> +
> + return ret;
> +}
> +
> +static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> + struct sev_gmem_populate_args sev_populate_args = {0};
> + struct kvm_sev_snp_launch_update params;
> + struct kvm_memory_slot *memslot;
> + unsigned int npages;
> + int ret = 0;
> +
> + if (!sev_snp_guest(kvm) || !sev->snp_context)
> + return -EINVAL;
> +
> + if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
> + return -EFAULT;
> +
> + if (!IS_ALIGNED(params.len, PAGE_SIZE) ||
> + (params.type != KVM_SEV_SNP_PAGE_TYPE_NORMAL &&
> + params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO &&
> + params.type != KVM_SEV_SNP_PAGE_TYPE_UNMEASURED &&
> + params.type != KVM_SEV_SNP_PAGE_TYPE_SECRETS &&
> + params.type != KVM_SEV_SNP_PAGE_TYPE_CPUID))
> + return -EINVAL;
> +
> + npages = params.len / PAGE_SIZE;
> +
> + pr_debug("%s: GFN range 0x%llx-0x%llx type %d\n", __func__,
> + params.gfn_start, params.gfn_start + npages, params.type);
> +
> + /*
> + * For each GFN that's being prepared as part of the initial guest
> + * state, the following pre-conditions are verified:
> + *
> + * 1) The backing memslot is a valid private memslot.
> + * 2) The GFN has been set to private via KVM_SET_MEMORY_ATTRIBUTES
> + * beforehand.
> + * 3) The PFN of the guest_memfd has not already been set to private
> + * in the RMP table.
> + *
> + * The KVM MMU relies on kvm->mmu_invalidate_seq to retry nested page
> + * faults if there's a race between a fault and an attribute update via
> + * KVM_SET_MEMORY_ATTRIBUTES, and a similar approach could be utilized
> + * here. However, kvm->slots_lock guards against both this as well as
> + * concurrent memslot updates occurring while these checks are being
> + * performed, so use that here to make it easier to reason about the
> + * initial expected state and better guard against unexpected
> + * situations.
> + */
> + mutex_lock(&kvm->slots_lock);
> +
> + memslot = gfn_to_memslot(kvm, params.gfn_start);
> + if (!kvm_slot_can_be_private(memslot)) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + sev_populate_args.sev_fd = argp->sev_fd;
> + sev_populate_args.type = params.type;
> +
> + ret = kvm_gmem_populate(kvm, params.gfn_start, u64_to_user_ptr(params.uaddr),
> + npages, sev_gmem_post_populate, &sev_populate_args);
> + if (ret < 0) {
> + argp->error = sev_populate_args.fw_error;
> + pr_debug("%s: kvm_gmem_populate failed, ret %d (fw_error %d)\n",
> + __func__, ret, argp->error);
> + } else if (ret < npages) {
> + params.len = ret * PAGE_SIZE;
> + ret = -EINTR;

This probably should 1) update also gfn_start and uaddr 2) return 0
for consistency with the planned KVM_PRE_FAULT_MEMORY ioctl (aka
KVM_MAP_MEMORY).

Paolo