[PATCH v11 14/19] KVM: s390: pv: cleanup leftover protected VMs if needed
From: Claudio Imbrenda
Date: Fri Jun 03 2022 - 02:58:25 EST
In upcoming patches it will be possible to start tearing down a
protected VM, and finish the teardown concurrently in a different
thread.
Protected VMs that are pending for tear down ("leftover") need to be
cleaned properly when the userspace process (e.g. qemu) terminates.
This patch makes sure that all "leftover" protected VMs are always
properly torn down.
Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
arch/s390/include/asm/kvm_host.h | 2 +
arch/s390/kvm/kvm-s390.c | 2 +
arch/s390/kvm/pv.c | 109 ++++++++++++++++++++++++++++---
3 files changed, 104 insertions(+), 9 deletions(-)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 5824efe5fc9d..cca8e05e0a71 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -924,6 +924,8 @@ struct kvm_s390_pv {
u64 guest_len;
unsigned long stor_base;
void *stor_var;
+ void *prepared_for_async_deinit;
+ struct list_head need_cleanup;
struct mmu_notifier mmu_notifier;
};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fe1fa896def7..369de8377116 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2890,6 +2890,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_vsie_init(kvm);
if (use_gisa)
kvm_s390_gisa_init(kvm);
+ INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
+ kvm->arch.pv.prepared_for_async_deinit = NULL;
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 6cffea26c47f..8471c17d538c 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,19 @@
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+/**
+ * @struct leftover_pv_vm
+ * Represents a "leftover" protected VM that is still registered with the
+ * Ultravisor, but which does not correspond any longer to an active KVM VM.
+ */
+struct leftover_pv_vm {
+ struct list_head list;
+ unsigned long old_gmap_table;
+ u64 handle;
+ void *stor_var;
+ unsigned long stor_base;
+};
+
static void kvm_s390_clear_pv_state(struct kvm *kvm)
{
kvm->arch.pv.handle = 0;
@@ -158,23 +171,88 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
return -ENOMEM;
}
+/**
+ * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
+ * @kvm the KVM that was associated with this leftover protected VM
+ * @leftover details about the leftover protected VM that needs a clean up
+ * @rc the RC code of the Destroy Secure Configuration UVC
+ * @rrc the RRC code of the Destroy Secure Configuration UVC
+ * Return: 0 in case of success, otherwise 1
+ *
+ * Destroy one leftover protected VM.
+ * On success, kvm->mm->context.protected_count will be decremented atomically
+ * and all other resources used by the VM will be freed.
+ */
+static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, struct leftover_pv_vm *leftover,
+ u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
+ if (cc)
+ return cc;
+ /*
+ * Intentionally leak unusable memory. If the UVC fails, the memory
+ * used for the VM and its metadata is permanently unusable.
+ * This can only happen in case of a serious KVM or hardware bug; it
+ * is not expected to happen in normal operation.
+ */
+ free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
+ free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
+ vfree(leftover->stor_var);
+ atomic_dec(&kvm->mm->context.protected_count);
+ return 0;
+}
+
+/**
+ * kvm_s390_pv_cleanup_leftovers - Clean up all leftover protected VMs.
+ * @kvm the KVM whose leftover protected VMs are to be cleaned up
+ * @rc the RC code of the first failing UVC, unless it was already != 1
+ * @rrc the RRC code of the first failing UVC, unless @rc was already != 1
+ * Return: 0 if all leftover VMs are successfully cleaned up, otherwise 1
+ *
+ * This function will clean up all "leftover" protected VMs, including the
+ * one that had been set aside for deferred teardown.
+ */
+static int kvm_s390_pv_cleanup_leftovers(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct leftover_pv_vm *cur;
+ u16 _rc, _rrc;
+ int cc = 0;
+
+ if (kvm->arch.pv.prepared_for_async_deinit)
+ list_add(kvm->arch.pv.prepared_for_async_deinit, &kvm->arch.pv.need_cleanup);
+
+ while (!list_empty(&kvm->arch.pv.need_cleanup)) {
+ cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
+ if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
+ cc = 1;
+ /* do not overwrite a previous error code */
+ if (*rc == 1) {
+ *rc = _rc;
+ *rrc = _rrc;
+ }
+ }
+ list_del(&cur->list);
+ kfree(cur);
+ }
+ kvm->arch.pv.prepared_for_async_deinit = NULL;
+ return cc;
+}
+
/* this should not fail, but if it does, we must not free the donated memory */
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
int cc;
+ /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */
+ atomic_inc(&kvm->mm->context.protected_count);
+
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
- /*
- * if the mm still has a mapping, make all its pages accessible
- * before destroying the guest
- */
- if (mmget_not_zero(kvm->mm)) {
- s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
- mmput(kvm->mm);
- }
-
if (!cc) {
atomic_dec(&kvm->mm->context.protected_count);
kvm_s390_pv_dealloc_vm(kvm);
@@ -185,6 +263,19 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+ cc |= kvm_s390_pv_cleanup_leftovers(kvm, rc, rrc);
+
+ /*
+ * If the mm still has a mapping, try to mark all its pages as
+ * accessible. The counter should not reach zero before this
+ * cleanup has been performed.
+ */
+ if (mmget_not_zero(kvm->mm)) {
+ s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+ mmput(kvm->mm);
+ }
+ /* Now the counter can safely reach 0 */
+ atomic_dec(&kvm->mm->context.protected_count);
return cc ? -EIO : 0;
}
--
2.36.1