Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module

From: Huang, Kai
Date: Fri Mar 24 2023 - 06:42:10 EST


Sorry for late reply.

>
> +static bool enable_tdx __ro_after_init;
> +module_param_named(tdx, enable_tdx, bool, 0444);
> +
> +static int vt_hardware_enable(void)
> +{
> + int ret;
> +
> + ret = vmx_hardware_enable();
> + if (ret || !enable_tdx)
> + return ret;
> +
> + ret = tdx_cpu_enable();
> + if (ret)
> + vmx_hardware_disable();
> + return ret;
> +}
> +
> +static __init int vt_hardware_setup(void)
> +{
> + int ret;
> +
> + ret = vmx_hardware_setup();
> + if (ret)
> + return ret;
> +
> + enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);
> +
> + return 0;
> +}
> +
> #define VMX_REQUIRED_APICV_INHIBITS \
> ( \
> BIT(APICV_INHIBIT_REASON_DISABLE)| \
> @@ -24,7 +54,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>
> .hardware_unsetup = vmx_hardware_unsetup,
>
> - .hardware_enable = vmx_hardware_enable,
> + .hardware_enable = vt_hardware_enable,
> .hardware_disable = vmx_hardware_disable,
> .has_emulated_msr = vmx_has_emulated_msr,
>
> @@ -159,7 +189,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> };
>
> struct kvm_x86_init_ops vt_init_ops __initdata = {
> - .hardware_setup = vmx_hardware_setup,
> + .hardware_setup = vt_hardware_setup,
> .handle_intel_pt_intr = NULL,
>
> .runtime_ops = &vt_x86_ops,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> new file mode 100644
> index 000000000000..f13fdf71430b
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -0,0 +1,46 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/tdx.h>
> +
> +#include "capabilities.h"
> +#include "x86_ops.h"
> +#include "x86.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +static int __init tdx_module_setup(void)
> +{
> + int ret;
> +
> + ret = tdx_enable();
> + if (ret) {
> + pr_info("Failed to initialize TDX module.\n");
> + return ret;
> + }
> +
> + pr_info("TDX is supported.\n");

Both pr_info()s are not required, because tdx_enable() internally prints them.

> + return 0;
> +}
> +
> +int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + int r = 0;
> +
> + if (!enable_ept) {
> + pr_warn("Cannot enable TDX with EPT disabled\n");
> + return -EINVAL;
> + }
> +
> + /* tdx_enable() in tdx_module_setup() requires cpus lock. */
> + cpus_read_lock();
> + r = kvm_hardware_enable_all();
> + if (!r) {
> + r = tdx_module_setup();
> + kvm_hardware_disable_all();
> + }
> + cpus_read_unlock();
> +
> + return r;
> +}
>

[...]


> #endif /* __KVM_X86_VMX_X86_OPS_H */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2125fcaa3973..b264012a8478 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9435,6 +9435,16 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
>
> kvm_init_pmu_capability(ops->pmu_ops);
>
> + /*
> + * TDX requires those methods to enable VMXON by
> + * kvm_hardware_enable/disable_all()
> + */
> + static_call_update(kvm_x86_check_processor_compatibility,
> + ops->runtime_ops->check_processor_compatibility);
> + static_call_update(kvm_x86_hardware_enable,
> + ops->runtime_ops->hardware_enable);
> + static_call_update(kvm_x86_hardware_disable,
> + ops->runtime_ops->hardware_disable);
> r = ops->hardware_setup();
> if (r != 0)
> goto out_mmu_exit;

Hmm.. I think this is ugly. Perhaps we should never do any
static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
called before kvm_ops_update() and may update vendor's kvm_x86_ops.

So probably use hardware_enable_all() in hardware_setup() is a bad idea.

I think we have below options on how to handle:

1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup(). For instance,
something like below:

int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
{
...

cpus_read_lock();
r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
if (!r)
r = tdx_module_setup();
on_each_cpu(vt_x86_ops.hardware_disable, ...);
cpus_read_unlock();

...
}

But this doesn't clean up nicely when there's some particular cpus fail to do
hardware_enable(). To clean up nicely, we do need additional things similar to
the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
hardware_enable() successfully.

2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
code self-contained. But this would require exposing kvm_x86_ops as symbol,
which isn't nice either.

3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
after kvm_ops_update().

Personally, I think 3) perhaps is the most elegant one, but not sure whether
Sean/Paolo has any opinion.