Re: [PATCH 3/3] KVM, SEV: Add support for SEV-ES local migration
From: Marc Orr
Date: Tue Jul 13 2021 - 19:26:15 EST
On Mon, Jun 21, 2021 at 9:59 AM Peter Gonda <pgonda@xxxxxxxxxx> wrote:
>
> Local migration provides a low-cost mechanism for userspace VMM upgrades.
> It is an alternative to traditional (i.e., remote) live migration. Whereas
> remote migration handles move a guest to a new host, local migration only
> handles moving a guest to a new userspace VMM within a host.
>
> For SEV-ES to work with local migration the VMSAs, GHCB metadata,
> and other SEV-ES info needs to be preserved along with the guest's
> memory. KVM maintains a pointer to each vCPUs GHCB and may additionally
> contain an copy of the GHCB's save area if the guest has been using it
> for NAE handling. The local send and receive ioctls have been updated to
> move this additional metadata required for each vCPU in SEV-ES into
> hashmap for SEV local migration data.
>
> Signed-off-by: Peter Gonda <pgonda@xxxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Sean Christopherson <seanjc@xxxxxxxxxx>
> Cc: David Rientjes <rientjes@xxxxxxxxxx>
> Cc: Dr. David Alan Gilbert <dgilbert@xxxxxxxxxx>
> Cc: Brijesh Singh <brijesh.singh@xxxxxxx>
> Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
> Cc: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> Cc: Jim Mattson <jmattson@xxxxxxxxxx>
> Cc: Joerg Roedel <joro@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Borislav Petkov <bp@xxxxxxxxx>
> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> Cc: kvm@xxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
>
> ---
> arch/x86/kvm/svm/sev.c | 164 +++++++++++++++++++++++++++++++++++++----
> 1 file changed, 150 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 7c33ad2b910d..33df7ed08d21 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -77,6 +77,19 @@ struct enc_region {
> unsigned long size;
> };
>
> +struct vmsa_node {
> + struct list_head list;
> + int vcpu_id;
> + struct vmcb_save_area *vmsa;
> + struct ghcb *ghcb;
> + u64 ghcb_gpa;
> +
> + void *ghcb_sa;
> + u64 ghcb_sa_len;
> + bool ghcb_sa_sync;
> + bool ghcb_sa_free;
> +};
> +
> struct sev_info_migration_node {
> struct hlist_node hnode;
> u64 token;
> @@ -87,6 +100,11 @@ struct sev_info_migration_node {
> unsigned long pages_locked;
> struct list_head regions_list;
> struct misc_cg *misc_cg;
> +
> + /* The following fields are for SEV-ES guests */
> + bool es_enabled;
> + struct list_head vmsa_list;
> + u64 ap_jump_table;
> };
>
> #define SEV_INFO_MIGRATION_HASH_BITS 7
> @@ -1163,6 +1181,94 @@ static int place_migration_node(struct sev_info_migration_node *entry)
> return ret;
> }
>
> +static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list)
> +{
> + struct vmsa_node *vmsa_node, *q;
> + struct kvm_vcpu *vcpu;
> + struct vcpu_svm *svm;
> +
> + lockdep_assert_held(&kvm->lock);
> +
> + if (!vmsa_list)
> + return 0;
> +
> + list_for_each_entry(vmsa_node, vmsa_list, list) {
> + if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) {
> + WARN(1,
> + "Failed to find VCPU with ID %d despite presence in VMSA list.\n",
> + vmsa_node->vcpu_id);
> + return -1;
> + }
> + }
> +
> + /*
> + * Move any stashed VMSAs back to their respective VMCBs and delete
> + * those nodes.
> + */
> + list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) {
> + vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id);
> + svm = to_svm(vcpu);
> + svm->vmsa = vmsa_node->vmsa;
> + svm->ghcb = vmsa_node->ghcb;
> + svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa;
> + svm->vcpu.arch.guest_state_protected = true;
> + svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
> + svm->ghcb_sa = vmsa_node->ghcb_sa;
> + svm->ghcb_sa_len = vmsa_node->ghcb_sa_len;
> + svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync;
> + svm->ghcb_sa_free = vmsa_node->ghcb_sa_free;
> +
> + list_del(&vmsa_node->list);
> + kfree(vmsa_node);
> + }
> +
> + return 0;
> +}
> +
> +static int create_vmsa_list(struct kvm *kvm,
> + struct sev_info_migration_node *entry)
> +{
> + int i;
> + const int num_vcpus = atomic_read(&kvm->online_vcpus);
> + struct vmsa_node *node;
> + struct kvm_vcpu *vcpu;
> + struct vcpu_svm *svm;
> +
> + INIT_LIST_HEAD(&entry->vmsa_list);
> + for (i = 0; i < num_vcpus; ++i) {
> + node = kzalloc(sizeof(*node), GFP_KERNEL);
> + if (!node)
> + goto e_freelist;
> +
> + vcpu = kvm->vcpus[i];
> + node->vcpu_id = vcpu->vcpu_id;
> +
> + svm = to_svm(vcpu);
> + node->vmsa = svm->vmsa;
> + svm->vmsa = NULL;
> + node->ghcb = svm->ghcb;
> + svm->ghcb = NULL;
> + node->ghcb_gpa = svm->vmcb->control.ghcb_gpa;
> + node->ghcb_sa = svm->ghcb_sa;
> + svm->ghcb_sa = NULL;
> + node->ghcb_sa_len = svm->ghcb_sa_len;
> + svm->ghcb_sa_len = 0;
> + node->ghcb_sa_sync = svm->ghcb_sa_sync;
> + svm->ghcb_sa_sync = false;
> + node->ghcb_sa_free = svm->ghcb_sa_free;
> + svm->ghcb_sa_free = false;
> +
> + list_add_tail(&node->list, &entry->vmsa_list);
> + }
> +
> + return 0;
> +
> +e_freelist:
> + if (process_vmsa_list(kvm, &entry->vmsa_list))
> + WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now.");
> + return -1;
> +}
> +
> static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> {
> struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1174,9 +1280,6 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> if (!sev_guest(kvm))
> return -ENOTTY;
>
> - if (sev->es_active)
> - return -EPERM;
> -
> if (sev->info_token != 0)
> return -EEXIST;
>
> @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> INIT_LIST_HEAD(&entry->regions_list);
> list_replace_init(&sev->regions_list, &entry->regions_list);
>
> + if (sev_es_guest(kvm)) {
> + /*
> + * If this is an ES guest, we need to move each VMCB's VMSA into a
> + * list for migration.
> + */
> + entry->es_enabled = true;
> + entry->ap_jump_table = sev->ap_jump_table;
> + if (create_vmsa_list(kvm, entry))
> + goto e_listdel;
> + }
> +
> if (place_migration_node(entry))
> - goto e_listdel;
> + goto e_vmsadel;
>
> token = entry->token;
>
> @@ -1215,6 +1329,11 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> hash_del(&entry->hnode);
> spin_unlock(&sev_info_migration_hash_lock);
>
> +e_vmsadel:
> + if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list))
> + WARN(1,
> + "Unable to move VMSA list back to source VM. Guest is in a broken state now.");
> +
> e_listdel:
> list_replace_init(&entry->regions_list, &sev->regions_list);
>
> @@ -1233,9 +1352,6 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
> if (!sev_guest(kvm))
> return -ENOTTY;
>
> - if (sev->es_active)
> - return -EPERM;
> -
> if (sev->handle != 0)
> return -EPERM;
>
> @@ -1254,6 +1370,14 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>
> memcpy(&old_info, sev, sizeof(old_info));
>
> + if (entry->es_enabled) {
> + if (process_vmsa_list(kvm, &entry->vmsa_list))
> + goto err_unlock;
> +
> + sev->es_active = true;
> + sev->ap_jump_table = entry->ap_jump_table;
> + }
> +
> /*
> * The source VM always frees @entry On the target we simply
> * mark the token as invalid to notify the source the sev info
> @@ -2046,12 +2170,22 @@ void sev_vm_destroy(struct kvm *kvm)
> __unregister_region_list_locked(kvm, &sev->regions_list);
> }
>
> - /*
> - * If userspace was terminated before unregistering the memory
> - * regions then lets unpin all the registered memory.
> - */
> - if (entry)
> + if (entry) {
> + /*
> + * If there are any saved VMSAs, restore them so they can be
> + * destructed through the normal path.
> + */
> + if (entry->es_enabled)
> + if (process_vmsa_list(kvm, &entry->vmsa_list))
> + WARN(1,
> + "Unable to clean up vmsa_list");
> +
> + /*
> + * If userspace was terminated before unregistering the memory
> + * regions then lets unpin all the registered memory.
> + */
> __unregister_region_list_locked(kvm, &entry->regions_list);
> + }
>
> mutex_unlock(&kvm->lock);
>
> @@ -2243,9 +2377,11 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>
> svm = to_svm(vcpu);
>
> - if (vcpu->arch.guest_state_protected)
> + if (svm->ghcb && vcpu->arch.guest_state_protected)
> sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
> - __free_page(virt_to_page(svm->vmsa));
> +
> + if (svm->vmsa)
> + __free_page(virt_to_page(svm->vmsa));
>
> if (svm->ghcb_sa_free)
> kfree(svm->ghcb_sa);
> --
> 2.32.0.288.g62a8d224e6-goog
>
Reviewed-by: Marc Orr <marcorr@xxxxxxxxxx>