Re: [PATCH 3/3] KVM, SEV: Add support for SEV-ES local migration

From: Sean Christopherson
Date: Tue Jul 13 2021 - 18:22:01 EST


On Mon, Jun 21, 2021, Peter Gonda wrote:
> +static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list)
> +{
> + struct vmsa_node *vmsa_node, *q;
> + struct kvm_vcpu *vcpu;
> + struct vcpu_svm *svm;
> +
> + lockdep_assert_held(&kvm->lock);
> +
> + if (!vmsa_list)

This is pointless, all callers pass in a list, i.e. it's mandatory.

> + return 0;
> +
> + list_for_each_entry(vmsa_node, vmsa_list, list) {
> + if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) {
> + WARN(1,
> + "Failed to find VCPU with ID %d despite presence in VMSA list.\n",
> + vmsa_node->vcpu_id);
> + return -1;
> + }
> + }
> +
> + /*
> + * Move any stashed VMSAs back to their respective VMCBs and delete
> + * those nodes.
> + */
> + list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) {
> + vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id);

Barring a KVM bug, is it even theoretically possible for vcpu to be NULL? If not,
I'd simply drop the above sanity check. If this can only be true if there's a
KVM bug and you really want to keep it the WARN, just do:

if (WARN_ON(!vcpu))
continue;

since a KVM bug this egregious means all bets are off anyways. That should also
allow you to make this a void returning helper and avoid pointless checking.

> + svm = to_svm(vcpu);
> + svm->vmsa = vmsa_node->vmsa;
> + svm->ghcb = vmsa_node->ghcb;
> + svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa;
> + svm->vcpu.arch.guest_state_protected = true;
> + svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
> + svm->ghcb_sa = vmsa_node->ghcb_sa;
> + svm->ghcb_sa_len = vmsa_node->ghcb_sa_len;
> + svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync;
> + svm->ghcb_sa_free = vmsa_node->ghcb_sa_free;
> +
> + list_del(&vmsa_node->list);
> + kfree(vmsa_node);
> + }
> +
> + return 0;
> +}
> +
> +static int create_vmsa_list(struct kvm *kvm,
> + struct sev_info_migration_node *entry)
> +{
> + int i;
> + const int num_vcpus = atomic_read(&kvm->online_vcpus);
> + struct vmsa_node *node;
> + struct kvm_vcpu *vcpu;
> + struct vcpu_svm *svm;
> +
> + INIT_LIST_HEAD(&entry->vmsa_list);
> + for (i = 0; i < num_vcpus; ++i) {
> + node = kzalloc(sizeof(*node), GFP_KERNEL);
> + if (!node)
> + goto e_freelist;
> +
> + vcpu = kvm->vcpus[i];
> + node->vcpu_id = vcpu->vcpu_id;
> +
> + svm = to_svm(vcpu);
> + node->vmsa = svm->vmsa;
> + svm->vmsa = NULL;
> + node->ghcb = svm->ghcb;
> + svm->ghcb = NULL;
> + node->ghcb_gpa = svm->vmcb->control.ghcb_gpa;
> + node->ghcb_sa = svm->ghcb_sa;
> + svm->ghcb_sa = NULL;
> + node->ghcb_sa_len = svm->ghcb_sa_len;
> + svm->ghcb_sa_len = 0;
> + node->ghcb_sa_sync = svm->ghcb_sa_sync;
> + svm->ghcb_sa_sync = false;
> + node->ghcb_sa_free = svm->ghcb_sa_free;
> + svm->ghcb_sa_free = false;
> +
> + list_add_tail(&node->list, &entry->vmsa_list);
> + }
> +
> + return 0;
> +
> +e_freelist:
> + if (process_vmsa_list(kvm, &entry->vmsa_list))
> + WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now.");

Same comments about err_freelist and using WARN_ON(). Though if process_vmsa_list()
can't return an error, this goes away entirely.

> + return -1;
> +}
> +
> static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> {
> struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1174,9 +1280,6 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> if (!sev_guest(kvm))
> return -ENOTTY;
>
> - if (sev->es_active)
> - return -EPERM;
> -
> if (sev->info_token != 0)
> return -EEXIST;
>
> @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> INIT_LIST_HEAD(&entry->regions_list);
> list_replace_init(&sev->regions_list, &entry->regions_list);
>
> + if (sev_es_guest(kvm)) {
> + /*
> + * If this is an ES guest, we need to move each VMCB's VMSA into a
> + * list for migration.
> + */
> + entry->es_enabled = true;
> + entry->ap_jump_table = sev->ap_jump_table;
> + if (create_vmsa_list(kvm, entry))
> + goto e_listdel;
> + }
> +
> if (place_migration_node(entry))
> - goto e_listdel;
> + goto e_vmsadel;
>
> token = entry->token;
>
> @@ -1215,6 +1329,11 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
> hash_del(&entry->hnode);
> spin_unlock(&sev_info_migration_hash_lock);
>
> +e_vmsadel:
> + if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list))
> + WARN(1,
> + "Unable to move VMSA list back to source VM. Guest is in a broken state now.");

Guess what today's Final Jeopardy answer is? :-D

> +
> e_listdel:
> list_replace_init(&entry->regions_list, &sev->regions_list);
>
> @@ -1233,9 +1352,6 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
> if (!sev_guest(kvm))
> return -ENOTTY;
>
> - if (sev->es_active)
> - return -EPERM;
> -
> if (sev->handle != 0)
> return -EPERM;
>
> @@ -1254,6 +1370,14 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>
> memcpy(&old_info, sev, sizeof(old_info));
>
> + if (entry->es_enabled) {
> + if (process_vmsa_list(kvm, &entry->vmsa_list))
> + goto err_unlock;
> +
> + sev->es_active = true;
> + sev->ap_jump_table = entry->ap_jump_table;
> + }
> +
> /*
> * The source VM always frees @entry On the target we simply
> * mark the token as invalid to notify the source the sev info
> @@ -2046,12 +2170,22 @@ void sev_vm_destroy(struct kvm *kvm)
> __unregister_region_list_locked(kvm, &sev->regions_list);
> }
>
> - /*
> - * If userspace was terminated before unregistering the memory
> - * regions then lets unpin all the registered memory.
> - */
> - if (entry)
> + if (entry) {
> + /*
> + * If there are any saved VMSAs, restore them so they can be
> + * destructed through the normal path.
> + */
> + if (entry->es_enabled)
> + if (process_vmsa_list(kvm, &entry->vmsa_list))
> + WARN(1,
> + "Unable to clean up vmsa_list");

More code that can be zapped if process_vmsa_list() is less of a zealot.

> +
> + /*
> + * If userspace was terminated before unregistering the memory
> + * regions then lets unpin all the registered memory.
> + */
> __unregister_region_list_locked(kvm, &entry->regions_list);
> + }
>
> mutex_unlock(&kvm->lock);
>
> @@ -2243,9 +2377,11 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>
> svm = to_svm(vcpu);
>
> - if (vcpu->arch.guest_state_protected)
> + if (svm->ghcb && vcpu->arch.guest_state_protected)
> sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
> - __free_page(virt_to_page(svm->vmsa));
> +
> + if (svm->vmsa)
> + __free_page(virt_to_page(svm->vmsa));
>
> if (svm->ghcb_sa_free)
> kfree(svm->ghcb_sa);
> --
> 2.32.0.288.g62a8d224e6-goog
>