[PATCH] Make TDX look more like VMX

From: Sean Christopherson
Date: Thu Dec 12 2024 - 13:22:25 EST


---
arch/x86/kvm/vmx/common.h | 64 +++++-
arch/x86/kvm/vmx/nested.c | 7 +-
arch/x86/kvm/vmx/posted_intr.h | 11 -
arch/x86/kvm/vmx/tdx.c | 375 +++++++++++----------------------
arch/x86/kvm/vmx/tdx.h | 11 +-
arch/x86/kvm/vmx/vmx.c | 26 +--
arch/x86/kvm/vmx/vmx.h | 42 +---
7 files changed, 201 insertions(+), 335 deletions(-)

diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index 809ced4c6cd8..f1679e53cb4b 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -12,6 +12,61 @@
#include "vmcs.h"
#include "x86.h"

+struct vcpu_vt {
+ /* Posted interrupt descriptor */
+ struct pi_desc pi_desc;
+
+ /* Used if this vCPU is waiting for PI notification wakeup. */
+ struct list_head pi_wakeup_list;
+
+ union vmx_exit_reason exit_reason;
+
+ unsigned long exit_qualification;
+ u64 ext_qualification;
+ gpa_t exit_gpa;
+ u32 exit_intr_info;
+ u32 idt_vectoring_info;
+
+
+ /*
+ * If true, guest state has been loaded into hardware, and host state
+ * saved into vcpu_{vt,vmx,tdx}. If false, host state is loaded into
+ * hardware.
+ */
+ bool guest_state_loaded;
+
+#ifdef CONFIG_X86_64
+ u64 msr_host_kernel_gs_base;
+#endif
+};
+
+static __always_inline unsigned long vmx_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ return to_vt(vcpu)->exit_reason;
+}
+
+static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vt *vt = to_vcpu_vt(vcpu);
+
+ if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1) &&
+ !WARN_ON_ONCE(is_td_vcpu(vcpu))))
+ vt->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ return vt->exit_qualification;
+}
+
+static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vt *vt = to_vcpu_vt(vcpu);
+
+ if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2) &&
+ !WARN_ON_ONCE(is_td_vcpu(vcpu)))
+ vt->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+ return vt->exit_intr_info;
+}
+
extern unsigned long vmx_host_idt_base;
void vmx_do_interrupt_irqoff(unsigned long entry);
void vmx_do_nmi_irqoff(void);
@@ -36,9 +91,10 @@ static inline void vmx_handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}

-static inline void vmx_handle_exception_irqoff(struct kvm_vcpu *vcpu,
- u32 intr_info)
+static inline void vmx_handle_exception_irqoff(struct kvm_vcpu *vcpu)
{
+ u32 intr_info = vmx_get_intr_info(vcpu);
+
/* if exit due to PF check for async PF */
if (is_page_fault(intr_info))
vcpu->arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
@@ -50,9 +106,9 @@ static inline void vmx_handle_exception_irqoff(struct kvm_vcpu *vcpu,
kvm_machine_check();
}

-static inline void vmx_handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
- u32 intr_info)
+static inline void vmx_handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
+ u32 intr_info = vmx_get_intr_info(vcpu);
unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;

if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index aa78b6f38dfe..056b6ff1503e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -410,6 +410,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vcpu_vt *vt = to_vt(vcpu);
unsigned long exit_qualification;
u32 vm_exit_reason;

@@ -425,7 +426,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
* tables also changed, but KVM should not treat EPT Misconfig
* VM-Exits as writes.
*/
- WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION);
+ WARN_ON_ONCE(vt->exit_reason.basic != EXIT_REASON_EPT_VIOLATION);

/*
* PML Full and EPT Violation VM-Exits both use bit 12 to report
@@ -6099,7 +6100,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
* nested VM-Exit. Pass the original exit reason, i.e. don't hardcode
* EXIT_REASON_VMFUNC as the exit reason.
*/
- nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
+ nested_vmx_vmexit(vcpu, vmx_get_exit_reason(vcpu).full,
vmx_get_intr_info(vcpu),
vmx_get_exit_qual(vcpu));
return 1;
@@ -6544,7 +6545,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- union vmx_exit_reason exit_reason = vmx->exit_reason;
+ union vmx_exit_reason exit_reason = vmx_get_exit_reason(vcpu);
unsigned long exit_qual;
u32 exit_intr_info;

diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 8b1dccfe4885..9ac4f6eafac5 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -5,17 +5,6 @@
#include <linux/find.h>
#include <asm/posted_intr.h>

-struct vcpu_pi {
- struct kvm_vcpu vcpu;
-
- /* Posted interrupt descriptor */
- struct pi_desc pi_desc;
-
- /* Used if this vCPU is waiting for PI notification wakeup. */
- struct list_head pi_wakeup_list;
- /* Until here common layout between vcpu_vmx and vcpu_tdx. */
-};
-
struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu);

void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 69ef9c967fbf..7eff717c9d0d 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -182,49 +182,6 @@ static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid)
return pa | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits);
}

-static __always_inline union vmx_exit_reason tdexit_exit_reason(struct kvm_vcpu *vcpu)
-{
- return (union vmx_exit_reason)(u32)(to_tdx(vcpu)->vp_enter_ret);
-}
-
-/*
- * There is no simple way to check some bit(s) to decide whether the return
- * value of TDH.VP.ENTER has a VMX exit reason or not. E.g.,
- * TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE has exit reason but with error bit
- * (bit 63) set, TDX_NON_RECOVERABLE_TD_CORRUPTED_MD has no exit reason but with
- * error bit cleared.
- */
-static __always_inline bool tdx_has_exit_reason(struct kvm_vcpu *vcpu)
-{
- u64 status = to_tdx(vcpu)->vp_enter_ret & TDX_SEAMCALL_STATUS_MASK;
-
- return status == TDX_SUCCESS || status == TDX_NON_RECOVERABLE_VCPU ||
- status == TDX_NON_RECOVERABLE_TD ||
- status == TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE ||
- status == TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE;
-}
-
-static __always_inline bool tdx_check_exit_reason(struct kvm_vcpu *vcpu, u16 reason)
-{
- return tdx_has_exit_reason(vcpu) &&
- (u16)tdexit_exit_reason(vcpu).basic == reason;
-}
-
-static __always_inline unsigned long tdexit_exit_qual(struct kvm_vcpu *vcpu)
-{
- return kvm_rcx_read(vcpu);
-}
-
-static __always_inline unsigned long tdexit_ext_exit_qual(struct kvm_vcpu *vcpu)
-{
- return kvm_rdx_read(vcpu);
-}
-
-static __always_inline unsigned long tdexit_gpa(struct kvm_vcpu *vcpu)
-{
- return kvm_r8_read(vcpu);
-}
-
static __always_inline unsigned long tdexit_intr_info(struct kvm_vcpu *vcpu)
{
return kvm_r9_read(vcpu);
@@ -246,23 +203,15 @@ BUILD_TDVMCALL_ACCESSORS(a1, r13);
BUILD_TDVMCALL_ACCESSORS(a2, r14);
BUILD_TDVMCALL_ACCESSORS(a3, r15);

-static __always_inline unsigned long tdvmcall_exit_type(struct kvm_vcpu *vcpu)
-{
- return kvm_r10_read(vcpu);
-}
-static __always_inline unsigned long tdvmcall_leaf(struct kvm_vcpu *vcpu)
-{
- return kvm_r11_read(vcpu);
-}
static __always_inline void tdvmcall_set_return_code(struct kvm_vcpu *vcpu,
long val)
{
- kvm_r10_write(vcpu, val);
+ ??? = val;
}
static __always_inline void tdvmcall_set_return_val(struct kvm_vcpu *vcpu,
unsigned long val)
{
- kvm_r11_write(vcpu, val);
+ ??? = val;
}

static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
@@ -742,11 +691,8 @@ bool tdx_interrupt_allowed(struct kvm_vcpu *vcpu)
* interrupt is always allowed unless TDX guest calls TDVMCALL with HLT,
* which passes the interrupt blocked flag.
*/
- if (!tdx_check_exit_reason(vcpu, EXIT_REASON_TDCALL) ||
- tdvmcall_exit_type(vcpu) || tdvmcall_leaf(vcpu) != EXIT_REASON_HLT)
- return true;
-
- return !tdvmcall_a0_read(vcpu);
+ return vmx_get_exit_reason(vcpu).basic != EXIT_REASON_HLT ||
+ <don't care where this resides>;
}

bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
@@ -768,31 +714,30 @@ bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
*/
void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
- struct vcpu_tdx *tdx = to_tdx(vcpu);
+ struct vcpu_vt *vt = to_vt(vcpu);

- if (!tdx->host_state_need_save)
+ if (vt->guest_state_loaded)
return;

if (likely(is_64bit_mm(current->mm)))
- tdx->msr_host_kernel_gs_base = current->thread.gsbase;
+ vt->msr_host_kernel_gs_base = current->thread.gsbase;
else
- tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+ vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);

- tdx->host_state_need_save = false;
+ vt->guest_state_loaded = true;
}

static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);

- tdx->host_state_need_save = true;
- if (!tdx->host_state_need_restore)
+ if (!vt->guest_state_loaded)
return;

++vcpu->stat.host_state_reload;

- wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base);
- tdx->host_state_need_restore = false;
+ wrmsrl(MSR_KERNEL_GS_BASE, vt->msr_host_kernel_gs_base);
+ vt->guest_state_loaded = false;
}

void tdx_vcpu_put(struct kvm_vcpu *vcpu)
@@ -897,57 +842,60 @@ static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
write_pkru(vcpu->arch.host_pkru);
}

+static union vmx_exit_reason tdx_to_vmx_exit_reason(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ u32 exit_reason;
+
+ switch (tdx->vp_enter_ret & TDX_SEAMCALL_STATUS_MASK) {
+ case TDX_SUCCESS:
+ case TDX_NON_RECOVERABLE_VCPU:
+ case TDX_NON_RECOVERABLE_TD;
+ case TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE:
+ case TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE:
+ break;
+ default:
+ return -1u;
+ }
+
+ exit_reason = tdx->vp_enter_ret;
+ switch (exit_reason)
+ case EXIT_REASON_TDCALL:
+ if (tdx->blah.tdvmcall_exit_type)
+ return EXIT_REASON_VMCALL;
+
+ if (tdx->blah.tdvmcall_leaf < 0x10000)
+ return tdx->blah.tdvmcall_leaf;
+ break;
+ case EXIT_REASON_EPT_MISCONFIG:
+ KVM_BUG_ON(1, vcpu->kvm);
+ return -1;
+ default:
+ break;
+ }
+ return exit_reason;
+}
+
static void tdx_vcpu_enter_exit(struct kvm_vcpu *vcpu)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
+ struct vcpu_vt *vt = to_vt(vcpu);
struct tdx_module_args args;
+ u64 status;

guest_state_enter_irqoff();

- /*
- * TODO: optimization:
- * - Eliminate copy between args and vcpu->arch.regs.
- * - copyin/copyout registers only if (tdx->tdvmvall.regs_mask != 0)
- * which means TDG.VP.VMCALL.
- */
- args = (struct tdx_module_args) {
- .rcx = tdx->tdvpr_pa,
-#define REG(reg, REG) .reg = vcpu->arch.regs[VCPU_REGS_ ## REG]
- REG(rdx, RDX),
- REG(r8, R8),
- REG(r9, R9),
- REG(r10, R10),
- REG(r11, R11),
- REG(r12, R12),
- REG(r13, R13),
- REG(r14, R14),
- REG(r15, R15),
- REG(rbx, RBX),
- REG(rdi, RDI),
- REG(rsi, RSI),
-#undef REG
- };
-
tdx->vp_enter_ret = tdh_vp_enter(tdx->tdvpr_pa, &args);

-#define REG(reg, REG) vcpu->arch.regs[VCPU_REGS_ ## REG] = args.reg
- REG(rcx, RCX);
- REG(rdx, RDX);
- REG(r8, R8);
- REG(r9, R9);
- REG(r10, R10);
- REG(r11, R11);
- REG(r12, R12);
- REG(r13, R13);
- REG(r14, R14);
- REG(r15, R15);
- REG(rbx, RBX);
- REG(rdi, RDI);
- REG(rsi, RSI);
-#undef REG
+ vt->exit_reason.full = tdx_to_vmx_exit_reason(vcpu);

- if (tdx_check_exit_reason(vcpu, EXIT_REASON_EXCEPTION_NMI) &&
- is_nmi(tdexit_intr_info(vcpu)))
+ tdx->exit.qualification = args.rcx;
+ tdx->exit.extended_qualification = args.rdx;
+ tdx->exit.intr_info = args.r9;
+ tdx->exit.guest_physical_address = args.r8;
+
+ if (vt->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) &&
+ is_nmi(vmx_get_intr_info(vcpu)))
__vmx_handle_nmi(vcpu);

guest_state_exit_irqoff();
@@ -971,11 +919,12 @@ static fastpath_t tdx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
+ struct vcpu_vt *vt = to_tdx(vcpu);

/* TDX exit handle takes care of this error case. */
if (unlikely(tdx->state != VCPU_TD_STATE_INITIALIZED)) {
- /* Set to avoid collision with EXIT_REASON_EXCEPTION_NMI. */
tdx->vp_enter_ret = TDX_SW_ERROR;
+ vt->exit_reason.full = -1ul;
return EXIT_FASTPATH_NONE;
}

@@ -1005,7 +954,7 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)

trace_kvm_exit(vcpu, KVM_ISA_VMX);

- if (unlikely(tdx_has_exit_reason(vcpu) && tdexit_exit_reason(vcpu).failed_vmentry))
+ if (unlikely(vmx_get_exit_reason(vcpu).failed_vmentry))
return EXIT_FASTPATH_NONE;

tdx_complete_interrupts(vcpu);
@@ -1032,15 +981,14 @@ void tdx_inject_nmi(struct kvm_vcpu *vcpu)
void tdx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
if (tdx_check_exit_reason(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT))
- vmx_handle_external_interrupt_irqoff(vcpu,
- tdexit_intr_info(vcpu));
+ vmx_handle_external_interrupt_irqoff(vcpu);
else if (tdx_check_exit_reason(vcpu, EXIT_REASON_EXCEPTION_NMI))
- vmx_handle_exception_irqoff(vcpu, tdexit_intr_info(vcpu));
+ vmx_handle_exception_irqoff(vcpu);
}

static int tdx_handle_exception_nmi(struct kvm_vcpu *vcpu)
{
- u32 intr_info = tdexit_intr_info(vcpu);
+ u32 intr_info = vmx_get_intr_info(vcpu);

/*
* Machine checks are handled by vmx_handle_exception_irqoff(), or by
@@ -1051,8 +999,7 @@ static int tdx_handle_exception_nmi(struct kvm_vcpu *vcpu)
return 1;

kvm_pr_unimpl("unexpected exception 0x%x(exit_reason 0x%llx qual 0x%lx)\n",
- intr_info,
- to_tdx(vcpu)->vp_enter_ret, tdexit_exit_qual(vcpu));
+ intr_info, to_tdx(vcpu)->vp_enter_ret, vmx_get_exit_qual(vcpu));

vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
vcpu->run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
@@ -1063,21 +1010,12 @@ static int tdx_handle_exception_nmi(struct kvm_vcpu *vcpu)

static int tdx_handle_external_interrupt(struct kvm_vcpu *vcpu)
{
- ++vcpu->stat.irq_exits;
return 1;
}

-static int tdx_handle_triple_fault(struct kvm_vcpu *vcpu)
-{
- vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
- vcpu->mmio_needed = 0;
- return 0;
-}
-
-
static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
{
- kvm_r10_write(vcpu, vcpu->run->hypercall.ret);
+ <tdx thingie> = kvm_rax_read(vcpu);
return 1;
}

@@ -1085,21 +1023,13 @@ static int tdx_emulate_vmcall(struct kvm_vcpu *vcpu)
{
int r;

- /*
- * ABI for KVM tdvmcall argument:
- * In Guest-Hypervisor Communication Interface(GHCI) specification,
- * Non-zero leaf number (R10 != 0) is defined to indicate
- * vendor-specific. KVM uses this for KVM hypercall. NOTE: KVM
- * hypercall number starts from one. Zero isn't used for KVM hypercall
- * number.
- *
- * R10: KVM hypercall number
- * arguments: R11, R12, R13, R14.
- */
- r = __kvm_emulate_hypercall(vcpu, r10, r11, r12, r13, r14, true, 0,
- complete_hypercall_exit);
+ kvm_rax_write(vcpu, blah);
+ kvm_rbx_write(vcpu, blah);
+ kvm_rcx_write(vcpu, blah);
+ kvm_rdx_write(vcpu, blah);
+ kvm_rsi_write(vcpu, blah);

- return r > 0;
+ return kvm_emulate_hypercall(vcpu, complete_hypercall_exit);
}

/*
@@ -1258,36 +1188,9 @@ static int tdx_report_fatal_error(struct kvm_vcpu *vcpu)
return 0;
}

-static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu)
-{
- u32 eax, ebx, ecx, edx;
-
- /* EAX and ECX for cpuid is stored in R12 and R13. */
- eax = tdvmcall_a0_read(vcpu);
- ecx = tdvmcall_a1_read(vcpu);
-
- kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, false);
-
- tdvmcall_a0_write(vcpu, eax);
- tdvmcall_a1_write(vcpu, ebx);
- tdvmcall_a2_write(vcpu, ecx);
- tdvmcall_a3_write(vcpu, edx);
-
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
-
- return 1;
-}
-
-static int tdx_emulate_hlt(struct kvm_vcpu *vcpu)
-{
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
- return kvm_emulate_halt_noskip(vcpu);
-}
-
static int tdx_complete_pio_out(struct kvm_vcpu *vcpu)
{
vcpu->arch.pio.count = 0;
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
return 1;
}

@@ -1301,10 +1204,7 @@ static int tdx_complete_pio_in(struct kvm_vcpu *vcpu)
vcpu->arch.pio.port, &val, 1);

WARN_ON_ONCE(!ret);
-
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
tdvmcall_set_return_val(vcpu, val);
-
return 1;
}

@@ -1337,7 +1237,6 @@ static int tdx_emulate_io(struct kvm_vcpu *vcpu)
if (ret) {
if (!write)
tdvmcall_set_return_val(vcpu, val);
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
} else {
if (write)
vcpu->arch.complete_userspace_io = tdx_complete_pio_out;
@@ -1348,22 +1247,18 @@ static int tdx_emulate_io(struct kvm_vcpu *vcpu)
return ret;
}

-static int tdx_complete_mmio(struct kvm_vcpu *vcpu)
+static int tdx_complete_mmio_read(struct kvm_vcpu *vcpu)
{
unsigned long val = 0;
gpa_t gpa;
int size;

- if (!vcpu->mmio_is_write) {
- gpa = vcpu->mmio_fragments[0].gpa;
- size = vcpu->mmio_fragments[0].len;
+ gpa = vcpu->mmio_fragments[0].gpa;
+ size = vcpu->mmio_fragments[0].len;

- memcpy(&val, vcpu->run->mmio.data, size);
- tdvmcall_set_return_val(vcpu, val);
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, size, gpa, &val);
- }
-
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
+ memcpy(&val, vcpu->run->mmio.data, size);
+ tdvmcall_set_return_val(vcpu, val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, size, gpa, &val);
return 1;
}

@@ -1434,7 +1329,8 @@ static int tdx_emulate_mmio(struct kvm_vcpu *vcpu)

/* Request the device emulation to userspace device model. */
vcpu->mmio_is_write = write;
- vcpu->arch.complete_userspace_io = tdx_complete_mmio;
+ if (!write)
+ vcpu->arch.complete_userspace_io = tdx_complete_mmio_read;

vcpu->run->mmio.phys_addr = gpa;
vcpu->run->mmio.len = size;
@@ -1455,39 +1351,15 @@ static int tdx_emulate_mmio(struct kvm_vcpu *vcpu)
return 1;
}

-static int tdx_emulate_rdmsr(struct kvm_vcpu *vcpu)
+int tdx_complete_emulated_msr(struct kvm_vcpu *vcpu)
{
- u32 index = tdvmcall_a0_read(vcpu);
- u64 data;
-
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ) ||
- kvm_get_msr(vcpu, index, &data)) {
- trace_kvm_msr_read_ex(index);
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
- return 1;
- }
- trace_kvm_msr_read(index, data);
-
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
- tdvmcall_set_return_val(vcpu, data);
- return 1;
-}
-
-static int tdx_emulate_wrmsr(struct kvm_vcpu *vcpu)
-{
- u32 index = tdvmcall_a0_read(vcpu);
- u64 data = tdvmcall_a1_read(vcpu);
-
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE) ||
- kvm_set_msr(vcpu, index, data)) {
- trace_kvm_msr_write_ex(index, data);
+ if (err) {
tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
return 1;
}

- trace_kvm_msr_write(index, data);
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
- return 1;
+ if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MSR_READ)
+ tdvmcall_set_return_val(vcpu, kvm_read_edx_eax(vcpu));
}

static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu)
@@ -1506,26 +1378,11 @@ static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu)

static int handle_tdvmcall(struct kvm_vcpu *vcpu)
{
- if (tdvmcall_exit_type(vcpu))
- return tdx_emulate_vmcall(vcpu);
-
- switch (tdvmcall_leaf(vcpu)) {
+ switch (to_tdx(vcpu)->blah.tdvmcall_leaf) {
case TDVMCALL_MAP_GPA:
return tdx_map_gpa(vcpu);
case TDVMCALL_REPORT_FATAL_ERROR:
return tdx_report_fatal_error(vcpu);
- case EXIT_REASON_CPUID:
- return tdx_emulate_cpuid(vcpu);
- case EXIT_REASON_HLT:
- return tdx_emulate_hlt(vcpu);
- case EXIT_REASON_IO_INSTRUCTION:
- return tdx_emulate_io(vcpu);
- case EXIT_REASON_EPT_VIOLATION:
- return tdx_emulate_mmio(vcpu);
- case EXIT_REASON_MSR_READ:
- return tdx_emulate_rdmsr(vcpu);
- case EXIT_REASON_MSR_WRITE:
- return tdx_emulate_wrmsr(vcpu);
case TDVMCALL_GET_TD_VM_CALL_INFO:
return tdx_get_td_vm_call_info(vcpu);
default:
@@ -1841,8 +1698,8 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,

static inline bool tdx_is_sept_violation_unexpected_pending(struct kvm_vcpu *vcpu)
{
- u64 eeq_type = tdexit_ext_exit_qual(vcpu) & TDX_EXT_EXIT_QUAL_TYPE_MASK;
- u64 eq = tdexit_exit_qual(vcpu);
+ u64 eeq_type = vmx_get_ext_exit_qual(vcpu) & TDX_EXT_EXIT_QUAL_TYPE_MASK;
+ u64 eq = vmx_get_exit_qual(vcpu);

if (eeq_type != TDX_EXT_EXIT_QUAL_TYPE_PENDING_EPT_VIOLATION)
return false;
@@ -1852,7 +1709,7 @@ static inline bool tdx_is_sept_violation_unexpected_pending(struct kvm_vcpu *vcp

static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
{
- gpa_t gpa = tdexit_gpa(vcpu);
+ gpa_t gpa = vmx_get_exit_gpa(vcpu);
unsigned long exit_qual;

if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) {
@@ -1873,7 +1730,7 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
*/
exit_qual = EPT_VIOLATION_ACC_WRITE;
} else {
- exit_qual = tdexit_exit_qual(vcpu);
+ exit_qual = vmx_get_exit_qual(vcpu);
/*
* EPT violation due to instruction fetch should never be
* triggered from shared memory in TDX guest. If such EPT
@@ -1889,18 +1746,14 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)

int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
{
+ union vmx_exit_reason exit_reason = vmx_get_exit_reason(vcpu);
struct vcpu_tdx *tdx = to_tdx(vcpu);
u64 vp_enter_ret = tdx->vp_enter_ret;
- union vmx_exit_reason exit_reason;

if (fastpath != EXIT_FASTPATH_NONE)
return 1;

- /*
- * Handle TDX SW errors, including TDX_SEAMCALL_UD, TDX_SEAMCALL_GP and
- * TDX_SEAMCALL_VMFAILINVALID.
- */
- if (unlikely((vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) {
+ if (unlikely(exit_reason.full == -1u) {
KVM_BUG_ON(!kvm_rebooting, vcpu->kvm);
goto unhandled_exit;
}
@@ -1909,33 +1762,47 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
* Without off-TD debug enabled, failed_vmentry case must have
* TDX_NON_RECOVERABLE set.
*/
- if (unlikely(vp_enter_ret & (TDX_ERROR | TDX_NON_RECOVERABLE))) {
- /* Triple fault is non-recoverable. */
- if (unlikely(tdx_check_exit_reason(vcpu, EXIT_REASON_TRIPLE_FAULT)))
- return tdx_handle_triple_fault(vcpu);
-
+ if (unlikely(vp_enter_ret & (TDX_ERROR | TDX_NON_RECOVERABLE)) &&
+ exit_reason.basic != EXIT_REASON_TRIPLE_FAULT) {
kvm_pr_unimpl("TD vp_enter_ret 0x%llx, hkid 0x%x hkid pa 0x%llx\n",
vp_enter_ret, to_kvm_tdx(vcpu->kvm)->hkid,
set_hkid_to_hpa(0, to_kvm_tdx(vcpu->kvm)->hkid));
goto unhandled_exit;
}

- /* From now, the seamcall status should be TDX_SUCCESS. */
- WARN_ON_ONCE((vp_enter_ret & TDX_SEAMCALL_STATUS_MASK) != TDX_SUCCESS);
- exit_reason = tdexit_exit_reason(vcpu);
+ WARN_ON_ONCE(exit_reason.basic != EXIT_REASON_TRIPLE_FAULT &&
+ (vp_enter_ret & TDX_SEAMCALL_STATUS_MASK) != TDX_SUCCESS);

switch (exit_reason.basic) {
case EXIT_REASON_EXCEPTION_NMI:
return tdx_handle_exception_nmi(vcpu);
case EXIT_REASON_EXTERNAL_INTERRUPT:
- return tdx_handle_external_interrupt(vcpu);
+ ++vcpu->stat.irq_exits;
+ return 1;
+ case EXIT_REASON_CPUID:
+ return tdx_emulate_cpuid(vcpu);
+ case EXIT_REASON_HLT:
+ return kvm_emulate_halt_noskip(vcpu);
+ case EXIT_REASON_VMCALL:
+ return tdx_emulate_vmcall(vcpu);
+ case EXIT_REASON_IO_INSTRUCTION:
+ return tdx_emulate_io(vcpu);
+ case EXIT_REASON_MSR_READ:
+ kvm_rcx_write(vcpu, <don't care where this comes from>);
+ return kvm_emulate_rdmsr(vcpu);
+ case EXIT_REASON_MSR_WRITE:
+ kvm_rcx_write(vcpu, <don't care where this comes from>);
+ return kvm_emulate_wrmsr(vcpu);
+ case EXIT_REASON_EPT_MISCONFIG:
+ return tdx_emulate_mmio(vcpu);
case EXIT_REASON_TDCALL:
return handle_tdvmcall(vcpu);
case EXIT_REASON_EPT_VIOLATION:
return tdx_handle_ept_violation(vcpu);
- case EXIT_REASON_EPT_MISCONFIG:
- KVM_BUG_ON(1, vcpu->kvm);
- return -EIO;
+ case EXIT_REASON_TRIPLE_FAULT:
+ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->mmio_needed = 0;
+ return 0;
case EXIT_REASON_OTHER_SMI:
/*
* Unlike VMX, SMI in SEAM non-root mode (i.e. when
@@ -1970,20 +1837,20 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
return 0;
}

-void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
- u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
+void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1,
+ u64 *info2, u32 *intr_info, u32 *error_code)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);

- if (tdx_has_exit_reason(vcpu)) {
+ if (tdx_get_exit_reason(vcpu).full != -1ul) {
/*
* Encode some useful info from the the 64 bit return code
* into the 32 bit exit 'reason'. If the VMX exit reason is
* valid, just set it to those bits.
*/
*reason = (u32)tdx->vp_enter_ret;
- *info1 = tdexit_exit_qual(vcpu);
- *info2 = tdexit_ext_exit_qual(vcpu);
+ *info1 = vmx_get_exit_qual(vcpu);
+ *info2 = vmx_get_ext_exit_qual(vcpu);
} else {
/*
* When the VMX exit reason in vp_enter_ret is not valid,
@@ -1997,7 +1864,7 @@ void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*info2 = 0;
}

- *intr_info = tdexit_intr_info(vcpu);
+ *intr_info = vmx_get_intr_info(vcpu);
*error_code = 0;
}

diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 0833d1084331..33d316e81a7e 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -59,12 +59,7 @@ enum vcpu_tdx_state {
struct vcpu_tdx {
struct kvm_vcpu vcpu;

- /* Posted interrupt descriptor */
- struct pi_desc pi_desc;
-
- /* Used if this vCPU is waiting for PI notification wakeup. */
- struct list_head pi_wakeup_list;
- /* Until here same layout to struct vcpu_pi. */
+ struct vcpu_vt vt;

unsigned long tdvpr_pa;
unsigned long *tdcx_pa;
@@ -75,10 +70,6 @@ struct vcpu_tdx {

enum vcpu_tdx_state state;

- bool host_state_need_save;
- bool host_state_need_restore;
- u64 msr_host_kernel_gs_base;
-
u64 map_gpa_next;
u64 map_gpa_end;
};
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 832387bea753..8302e429c82a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6099,9 +6099,9 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

- *reason = vmx->exit_reason.full;
+ *reason = vmx_get_exit_reason(vcpu).full;
*info1 = vmx_get_exit_qual(vcpu);
- if (!(vmx->exit_reason.failed_vmentry)) {
+ if (!(vmx_get_exit_reason(vcpu).failed_vmentry)) {
*info2 = vmx->idt_vectoring_info;
*intr_info = vmx_get_intr_info(vcpu);
if (is_exception_with_error_code(*intr_info))
@@ -6380,7 +6380,7 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- union vmx_exit_reason exit_reason = vmx->exit_reason;
+ union vmx_exit_reason exit_reason = vmx_get_exit_reason(vcpu);
u32 vectoring_info = vmx->idt_vectoring_info;
u16 exit_handler_index;

@@ -6901,11 +6901,10 @@ void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return;

- if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
- vmx_handle_external_interrupt_irqoff(vcpu,
- vmx_get_intr_info(vcpu));
- else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
- vmx_handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu));
+ if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXTERNAL_INTERRUPT)
+ vmx_handle_external_interrupt_irqoff(vcpu);
+ else if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXCEPTION_NMI)
+ vmx_handle_exception_irqoff(vcpu);
}

/*
@@ -7154,6 +7153,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
unsigned int flags)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vcpu_vt *vt = to_vt(vcpu);

guest_state_enter_irqoff();

@@ -7185,15 +7185,15 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_enable_fb_clear(vmx);

if (unlikely(vmx->fail)) {
- vmx->exit_reason.full = 0xdead;
+ vt->exit_reason.full = 0xdead;
goto out;
}

- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
- if (likely(!vmx->exit_reason.failed_vmentry))
+ vt->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+ if (likely(!vt->exit_reason.failed_vmentry))
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);

- if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+ if ((u16)vt->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(vmx_get_intr_info(vcpu)))
__vmx_handle_nmi(vcpu);

@@ -7331,7 +7331,7 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
* checking.
*/
if (vmx->nested.nested_run_pending &&
- !vmx->exit_reason.failed_vmentry)
+ !vmx_get_exit_reason(vcpu).failed_vmentry)
++vcpu->stat.nested_run;

vmx->nested.nested_run_pending = 0;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a91e1610b0b7..7a385dcdb2d5 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -231,28 +231,11 @@ struct nested_vmx {
struct vcpu_vmx {
struct kvm_vcpu vcpu;

- /* Posted interrupt descriptor */
- struct pi_desc pi_desc;
-
- /* Used if this vCPU is waiting for PI notification wakeup. */
- struct list_head pi_wakeup_list;
- /* Until here same layout to struct vcpu_pi. */
+ struct vcpu_vt vt;

u8 fail;
u8 x2apic_msr_bitmap_mode;

- /*
- * If true, host state has been stored in vmx->loaded_vmcs for
- * the CPU registers that only need to be switched when transitioning
- * to/from the kernel, and the registers have been loaded with guest
- * values. If false, host state is loaded in the CPU registers
- * and vmx->loaded_vmcs->host_state is invalid.
- */
- bool guest_state_loaded;
-
- unsigned long exit_qualification;
- u32 exit_intr_info;
- u32 idt_vectoring_info;
ulong rflags;

/*
@@ -263,11 +246,10 @@ struct vcpu_vmx {
*/
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
bool guest_uret_msrs_loaded;
+
#ifdef CONFIG_X86_64
- u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
-
u64 spec_ctrl;
u32 msr_ia32_umwait_control;

@@ -649,26 +631,6 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);

-static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1))
- vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-
- return vmx->exit_qualification;
-}
-
-static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2))
- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
- return vmx->exit_intr_info;
-}
-
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
void free_vmcs(struct vmcs *vmcs);
int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);

base-commit: 14cfaed7621d53af608fd96aa36188064937ca44
--
2.47.1.613.gc27f4b7a9f-goog


--1yoo3KhbUOvehEvV--