Re: [PATCH v19 041/130] KVM: TDX: Refuse to unplug the last cpu on the package
From: Chao Gao
Date: Wed Mar 20 2024 - 21:07:19 EST
>diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
>index 437c6d5e802e..d69dd474775b 100644
>--- a/arch/x86/kvm/vmx/main.c
>+++ b/arch/x86/kvm/vmx/main.c
>@@ -110,6 +110,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> .check_processor_compatibility = vmx_check_processor_compat,
>
> .hardware_unsetup = vt_hardware_unsetup,
>+ .offline_cpu = tdx_offline_cpu,
>
> /* TDX cpu enablement is done by tdx_hardware_setup(). */
> .hardware_enable = vmx_hardware_enable,
>diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>index b11f105db3cd..f2ee5abac14e 100644
>--- a/arch/x86/kvm/vmx/tdx.c
>+++ b/arch/x86/kvm/vmx/tdx.c
>@@ -97,6 +97,7 @@ int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> */
> static DEFINE_MUTEX(tdx_lock);
> static struct mutex *tdx_mng_key_config_lock;
>+static atomic_t nr_configured_hkid;
>
> static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid)
> {
>@@ -112,6 +113,7 @@ static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
> {
> tdx_guest_keyid_free(kvm_tdx->hkid);
> kvm_tdx->hkid = -1;
>+ atomic_dec(&nr_configured_hkid);
I may think it is better to extend IDA infrastructure e.g., add an API to check if
any ID is allocated for a given range. No strong opinion on this.
> }
>
> static inline bool is_hkid_assigned(struct kvm_tdx *kvm_tdx)
>@@ -586,6 +588,7 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
> if (ret < 0)
> return ret;
> kvm_tdx->hkid = ret;
>+ atomic_inc(&nr_configured_hkid);
>
> va = __get_free_page(GFP_KERNEL_ACCOUNT);
> if (!va)
>@@ -1071,3 +1074,41 @@ void tdx_hardware_unsetup(void)
> kfree(tdx_info);
> kfree(tdx_mng_key_config_lock);
> }
>+
>+int tdx_offline_cpu(void)
>+{
>+ int curr_cpu = smp_processor_id();
>+ cpumask_var_t packages;
>+ int ret = 0;
>+ int i;
>+
>+ /* No TD is running. Allow any cpu to be offline. */
>+ if (!atomic_read(&nr_configured_hkid))
>+ return 0;
>+
>+ /*
>+ * In order to reclaim TDX HKID, (i.e. when deleting guest TD), need to
>+ * call TDH.PHYMEM.PAGE.WBINVD on all packages to program all memory
>+ * controller with pconfig. If we have active TDX HKID, refuse to
>+ * offline the last online cpu.
>+ */
>+ if (!zalloc_cpumask_var(&packages, GFP_KERNEL))
>+ return -ENOMEM;
>+ for_each_online_cpu(i) {
>+ if (i != curr_cpu)
>+ cpumask_set_cpu(topology_physical_package_id(i), packages);
>+ }
Just check if any other CPU is in the same package of the one about to go
offline. This would obviate the need for the cpumask and allow us to break once
one cpu in the same package is found.
>+ /* Check if this cpu is the last online cpu of this package. */
>+ if (!cpumask_test_cpu(topology_physical_package_id(curr_cpu), packages))
>+ ret = -EBUSY;
>+ free_cpumask_var(packages);
>+ if (ret)
>+ /*
>+ * Because it's hard for human operator to understand the
>+ * reason, warn it.
>+ */
>+#define MSG_ALLPKG_ONLINE \
>+ "TDX requires all packages to have an online CPU. Delete all TDs in order to offline all CPUs of a package.\n"
>+ pr_warn_ratelimited(MSG_ALLPKG_ONLINE);
>+ return ret;
>+}