Re: [PATCH Part2 v5 25/45] KVM: SVM: Add KVM_SEV_SNP_LAUNCH_UPDATE command

From: Brijesh Singh
Date: Mon Sep 27 2021 - 15:33:18 EST




On 9/27/21 11:43 AM, Peter Gonda wrote:
...

+static bool is_hva_registered(struct kvm *kvm, hva_t hva, size_t len)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct enc_region *i;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry(i, head, list) {
+ u64 start = i->uaddr;
+ u64 end = start + i->size;
+
+ if (start <= hva && end >= (hva + len))
+ return true;
+ }
+
+ return false;
+}

Internally we actually register the guest memory in chunks for various
reasons. So for our largest SEV VM we have 768 1 GB entries in
|sev->regions_list|. This was OK before because no look ups were done.
Now that we are performing a look ups a linked list with linear time
lookups seems not ideal, could we switch the back data structure here
to something more conducive too fast lookups?
+

Interesting, for qemu we had very few number of regions so there was no strong reason for me to think something otherwise. Do you have any preference on what data structure you will use ?

+static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_snp_launch_update data = {0};
+ struct kvm_sev_snp_launch_update params;
+ unsigned long npages, pfn, n = 0;

Could we have a slightly more descriptive name for |n|? nprivate
maybe? Also why not zero in the loop below?


Sure, I will pick a better name and no need to zero above. I will fix it.

for (i = 0, n = 0; i < npages; ++i)

+ int *error = &argp->error;
+ struct page **inpages;
+ int ret, i, level;

Should |i| be an unsigned long since it can is tracked in a for loop
with "i < npages" npages being an unsigned long? (|n| too)


Noted.

+ u64 gfn;
+
+ if (!sev_snp_guest(kvm))
+ return -ENOTTY;
+
+ if (!sev->snp_context)
+ return -EINVAL;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ /* Verify that the specified address range is registered. */
+ if (!is_hva_registered(kvm, params.uaddr, params.len))
+ return -EINVAL;
+
+ /*
+ * The userspace memory is already locked so technically we don't
+ * need to lock it again. Later part of the function needs to know
+ * pfn so call the sev_pin_memory() so that we can get the list of
+ * pages to iterate through.
+ */
+ inpages = sev_pin_memory(kvm, params.uaddr, params.len, &npages, 1);
+ if (!inpages)
+ return -ENOMEM;
+
+ /*
+ * Verify that all the pages are marked shared in the RMP table before
+ * going further. This is avoid the cases where the userspace may try

This is *too* avoid cases...

Noted

+ * updating the same page twice.
+ */
+ for (i = 0; i < npages; i++) {
+ if (snp_lookup_rmpentry(page_to_pfn(inpages[i]), &level) != 0) {
+ sev_unpin_memory(kvm, inpages, npages);
+ return -EFAULT;
+ }
+ }
+
+ gfn = params.start_gfn;
+ level = PG_LEVEL_4K;
+ data.gctx_paddr = __psp_pa(sev->snp_context);
+
+ for (i = 0; i < npages; i++) {
+ pfn = page_to_pfn(inpages[i]);
+
+ ret = rmp_make_private(pfn, gfn << PAGE_SHIFT, level, sev_get_asid(kvm), true);
+ if (ret) {
+ ret = -EFAULT;
+ goto e_unpin;
+ }
+
+ n++;
+ data.address = __sme_page_pa(inpages[i]);
+ data.page_size = X86_TO_RMP_PG_LEVEL(level);
+ data.page_type = params.page_type;
+ data.vmpl3_perms = params.vmpl3_perms;
+ data.vmpl2_perms = params.vmpl2_perms;
+ data.vmpl1_perms = params.vmpl1_perms;
+ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, &data, error);
+ if (ret) {
+ /*
+ * If the command failed then need to reclaim the page.
+ */
+ snp_page_reclaim(pfn);
+ goto e_unpin;
+ }

Hmm if this call fails after the first iteration of this loop it will
lead to a hard to reproduce LaunchDigest right? Say if we are
SnpLaunchUpdating just 2 pages A and B. If we first call this ioctl
and A is SNP_LAUNCH_UPDATED'd but B fails, we then make A shared again
in the RMP. So we must call the ioctl with 2 pages again, after fixing
the issue with page B. Now the Launch digest has something like
Hash(A) then HASH(A & B) right (overly simplified) so A will be
included twice right? I am not sure if anything better can be done
here but might be worth documenting IIUC.


I can add a comment in documentation that if a LAUNCH_UPDATE fails then user need to destroy the existing context and start from the beginning. I am not sure if we want to support the partial update cases. But in case we have two choices a) decommission the context on failure or b) add a new command to destroy the existing context.


+
+ gfn++;
+ }
+
+e_unpin:
+ /* Content of memory is updated, mark pages dirty */
+ for (i = 0; i < n; i++) {
+ set_page_dirty_lock(inpages[i]);
+ mark_page_accessed(inpages[i]);
+
+ /*
+ * If its an error, then update RMP entry to change page ownership
+ * to the hypervisor.
+ */
+ if (ret)
+ host_rmp_make_shared(pfn, level, true);
+ }
+
+ /* Unlock the user pages */
+ sev_unpin_memory(kvm, inpages, npages);
+
+ return ret;
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1712,6 +1873,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
case KVM_SEV_SNP_LAUNCH_START:
r = snp_launch_start(kvm, &sev_cmd);
break;
+ case KVM_SEV_SNP_LAUNCH_UPDATE:
+ r = snp_launch_update(kvm, &sev_cmd);
+ break;
default:
r = -EINVAL;
goto out;
@@ -1794,6 +1958,29 @@ find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
static void __unregister_enc_region_locked(struct kvm *kvm,
struct enc_region *region)
{
+ unsigned long i, pfn;
+ int level;
+
+ /*
+ * The guest memory pages are assigned in the RMP table. Unassign it
+ * before releasing the memory.
+ */
+ if (sev_snp_guest(kvm)) {
+ for (i = 0; i < region->npages; i++) {
+ pfn = page_to_pfn(region->pages[i]);
+
+ if (!snp_lookup_rmpentry(pfn, &level))
+ continue;
+
+ cond_resched();
+
+ if (level > PG_LEVEL_4K)
+ pfn &= ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
+
+ host_rmp_make_shared(pfn, level, true);
+ }
+ }
+
sev_unpin_memory(kvm, region->pages, region->npages);
list_del(&region->list);
kfree(region);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e6416e58cd9a..0681be4bdfdf 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1715,6 +1715,7 @@ enum sev_cmd_id {
/* SNP specific commands */
KVM_SEV_SNP_INIT,
KVM_SEV_SNP_LAUNCH_START,
+ KVM_SEV_SNP_LAUNCH_UPDATE,

KVM_SEV_NR_MAX,
};
@@ -1831,6 +1832,24 @@ struct kvm_sev_snp_launch_start {
__u8 pad[6];
};

+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_VMSA 0x2
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 start_gfn;
+ __u64 uaddr;
+ __u32 len;
+ __u8 imi_page;
+ __u8 page_type;
+ __u8 vmpl3_perms;
+ __u8 vmpl2_perms;
+ __u8 vmpl1_perms;
+};
+
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
--
2.17.1