Re: [PATCH 20/30] KVM: TDX: create/destroy VM structure

From: Yan Zhao
Date: Mon Feb 24 2025 - 03:35:05 EST


On Fri, Feb 21, 2025 at 05:38:36PM -0800, Sean Christopherson wrote:
> On Sat, Feb 22, 2025, Rick P Edgecombe wrote:
> > On Thu, 2025-02-20 at 17:08 -0800, Sean Christopherson wrote:
> > > Argh, after digging more, this isn't actually true.  The separate "unload MMUs"
> > > code is leftover from when KVM rather stupidly tried to free all MMU pages when
> > > freeing a vCPU.
> > >
> > > Commit 7b53aa565084 ("KVM: Fix vcpu freeing for guest smp") "fixed" things by
> > > unloading MMUs before destroying vCPUs, but the underlying problem was trying to
> > > free _all_ MMU pages when destroying a single vCPU.  That eventually got fixed
> > > for good (haven't checked when), but the separate MMU unloading never got cleaned
> > > up.
> > >
> > > So, scratch the mmu_destroy() idea.  But I still want/need to move vCPU destruction
> > > before vm_destroy.
> > >
> > > Now that kvm_arch_pre_destroy_vm() is a thing, one idea would be to add
> > > kvm_x86_ops.pre_destroy_vm(), which would pair decently well with the existing
> > > call to kvm_mmu_pre_destroy_vm().
> >
> > So:
> > kvm_x86_call(vm_destroy)(kvm); -> tdx_mmu_release_hkid()
> > kvm_destroy_vcpus(kvm); -> tdx_vcpu_free() -> reclaim
> > static_call_cond(kvm_x86_vm_free)(kvm); -> reclaim
> >
> > To:
> > (pre_destroy_vm)() -> tdx_mmu_release_hkid()
> > kvm_destroy_vcpus(kvm); -> reclaim
> > kvm_x86_call(vm_destroy)(kvm); -> nothing
> > static_call_cond(kvm_x86_vm_free)(kvm); -> reclaim
>
> I was thinking this last one could go away, and TDX could reclaim at vm_destroy?
> Or is that not possible because it absolutely must come dead last?
Hmm, below change works on my end.

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 50cf27473ffb..79406bf07a1c 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -21,7 +21,7 @@ KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
KVM_X86_OP(vm_init)
KVM_X86_OP_OPTIONAL(vm_destroy)
-KVM_X86_OP_OPTIONAL(vm_free)
+KVM_X86_OP_OPTIONAL(vm_pre_destroy)
KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
KVM_X86_OP(vcpu_create)
KVM_X86_OP(vcpu_free)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8d15e604613b..2d5b6d34d30e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1677,7 +1677,7 @@ struct kvm_x86_ops {
unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
- void (*vm_free)(struct kvm *kvm);
+ void (*vm_pre_destroy)(struct kvm *kvm);

/* Create, but do not attach this VCPU */
int (*vcpu_precreate)(struct kvm *kvm);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 1fa0364faa60..a8acf98dfadd 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -76,18 +76,19 @@ static int vt_vm_init(struct kvm *kvm)
return vmx_vm_init(kvm);
}

-static void vt_vm_destroy(struct kvm *kvm)
+static void vt_vm_pre_destroy(struct kvm *kvm)
{
if (is_td(kvm))
return tdx_mmu_release_hkid(kvm);

- vmx_vm_destroy(kvm);
}

-static void vt_vm_free(struct kvm *kvm)
+static void vt_vm_destroy(struct kvm *kvm)
{
if (is_td(kvm))
- tdx_vm_free(kvm);
+ return tdx_vm_free(kvm);
+
+ vmx_vm_destroy(kvm);
}

static int vt_vcpu_precreate(struct kvm *kvm)
@@ -914,7 +915,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {

.vm_init = vt_vm_init,
.vm_destroy = vt_vm_destroy,
- .vm_free = vt_vm_free,
+ .vm_pre_destroy = vt_vm_pre_destroy,

.vcpu_precreate = vt_vcpu_precreate,
.vcpu_create = vt_vcpu_create,

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ae96449e6e2..cb2672a59715 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12889,6 +12889,7 @@ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
kvm_mmu_pre_destroy_vm(kvm);
+ static_call_cond(kvm_x86_vm_pre_destroy)(kvm);
}

void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -12908,7 +12909,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
- kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
@@ -12919,7 +12919,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_page_track_cleanup(kvm);
kvm_xen_destroy_vm(kvm);
kvm_hv_destroy_vm(kvm);
- static_call_cond(kvm_x86_vm_free)(kvm);
+ kvm_x86_call(vm_destroy)(kvm);
}

static void memslot_rmap_free(struct kvm_memory_slot *slot)