[PATCH v14 092/113] KVM: TDX: Handle TDX PV HLT hypercall

From: isaku . yamahata
Date: Mon May 29 2023 - 00:33:19 EST


From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

Wire up TDX PV HLT hypercall to the KVM backend function.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
arch/x86/kvm/vmx/tdx.c | 42 +++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/vmx/tdx.h | 3 +++
2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index f8ff12e462f5..304ad5e20697 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -634,7 +634,32 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
- return pi_has_pending_interrupt(vcpu);
+ bool ret = pi_has_pending_interrupt(vcpu);
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+ if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+ return true;
+
+ if (tdx->interrupt_disabled_hlt)
+ return false;
+
+ /*
+ * This is for the case where the virtual interrupt is recognized,
+ * i.e. set in vmcs.RVI, between the STI and "HLT". KVM doesn't have
+ * access to RVI and the interrupt is no longer in the PID (because it
+ * was "recognized". It doesn't get delivered in the guest because the
+ * TDCALL completes before interrupts are enabled.
+ *
+ * TDX modules sets RVI while in an STI interrupt shadow.
+ * - TDExit(typically TDG.VP.VMCALL<HLT>) from the guest to TDX module.
+ * The interrupt shadow at this point is gone.
+ * - It knows that there is an interrupt that can be delivered
+ * (RVI > PPR && EFLAGS.IF=1, the other conditions of 29.2.2 don't
+ * matter)
+ * - It forwards the TDExit nevertheless, to a clueless hypervisor that
+ * has no way to glean either RVI or PPR.
+ */
+ return !!xchg(&tdx->buggy_hlt_workaround, 0);
}

void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -1002,6 +1027,17 @@ static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu)
return 1;
}

+static int tdx_emulate_hlt(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+ /* See tdx_protected_apic_has_interrupt() to avoid heavy seamcall */
+ tdx->interrupt_disabled_hlt = tdvmcall_a0_read(vcpu);
+
+ tdvmcall_set_return_code(vcpu, TDG_VP_VMCALL_SUCCESS);
+ return kvm_emulate_halt_noskip(vcpu);
+}
+
static int handle_tdvmcall(struct kvm_vcpu *vcpu)
{
if (tdvmcall_exit_type(vcpu))
@@ -1010,6 +1046,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu)
switch (tdvmcall_leaf(vcpu)) {
case EXIT_REASON_CPUID:
return tdx_emulate_cpuid(vcpu);
+ case EXIT_REASON_HLT:
+ return tdx_emulate_hlt(vcpu);
default:
break;
}
@@ -1348,6 +1386,8 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
struct kvm_vcpu *vcpu = apic->vcpu;
struct vcpu_tdx *tdx = to_tdx(vcpu);

+ /* See comment in tdx_protected_apic_has_interrupt(). */
+ tdx->buggy_hlt_workaround = 1;
/* TDX supports only posted interrupt. No lapic emulation. */
__vmx_deliver_posted_interrupt(vcpu, &tdx->pi_desc, vector);
}
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 16f8e978633a..ff35cd8409d9 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -101,6 +101,9 @@ struct vcpu_tdx {
bool host_state_need_restore;
u64 msr_host_kernel_gs_base;

+ bool interrupt_disabled_hlt;
+ unsigned int buggy_hlt_workaround;
+
/*
* Dummy to make pmu_intel not corrupt memory.
* TODO: Support PMU for TDX. Future work.
--
2.25.1