Move VMX's handling of NMI VM-Exits into vmx_vcpu_enter_exit() so that
the NMI is handled prior to leaving the safety of noinstr. Handling the
NMI after leaving noinstr exposes the kernel to potential ordering
problems as an instrumentation-induced fault, e.g. #DB, #BP, #PF, etc.
will unblock NMIs when IRETing back to the faulting instruction.
Reported-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/vmx/vmcs.h | 4 ++--
arch/x86/kvm/vmx/vmenter.S | 8 ++++----
arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++-------------
arch/x86/kvm/x86.h | 6 +++---
4 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index ac290a44a693..7c1996b433e2 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -75,7 +75,7 @@ struct loaded_vmcs {
struct vmcs_controls_shadow controls_shadow;
};
-static inline bool is_intr_type(u32 intr_info, u32 type)
+static __always_inline bool is_intr_type(u32 intr_info, u32 type)
{
const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK;
@@ -146,7 +146,7 @@ static inline bool is_icebp(u32 intr_info)
return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION);
}
-static inline bool is_nmi(u32 intr_info)
+static __always_inline bool is_nmi(u32 intr_info)
{
return is_intr_type(intr_info, INTR_TYPE_NMI_INTR);
}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 9d987e7e48c4..059243085211 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -299,6 +299,10 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+ VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
+
.section .text, "ax"
@@ -353,10 +357,6 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c242e2591896..b03020ca1840 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5095,8 +5095,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
vect_info = vmx->idt_vectoring_info;
intr_info = vmx_get_intr_info(vcpu);
+ /*
+ * Machine checks are handled by handle_exception_irqoff(), or by
+ * vmx_vcpu_run() if a #MC occurs on VM-Entry. NMIs are handled by
+ * vmx_vcpu_enter_exit().
+ */
if (is_machine_check(intr_info) || is_nmi(intr_info))
- return 1; /* handled by handle_exception_nmi_irqoff() */
+ return 1;
/*
* Queue the exception here instead of in handle_nm_fault_irqoff().
@@ -6809,7 +6814,7 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}
-static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
+static void handle_exception_irqoff(struct vcpu_vmx *vmx)
{
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
@@ -6822,12 +6827,6 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
/* Handle machine checks before interrupts are enabled */
else if (is_machine_check(intr_info))
kvm_machine_check();
- /* We need to handle NMIs before interrupts are enabled */
- else if (is_nmi(intr_info)) {
- kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
- kvm_after_interrupt(&vmx->vcpu);
- }
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6857,7 +6856,7 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
handle_external_interrupt_irqoff(vcpu);
else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
- handle_exception_nmi_irqoff(vmx);
+ handle_exception_irqoff(vmx);
}
/*
@@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_enable_fb_clear(vmx);
+ if (unlikely(vmx->fail))
+ vmx->exit_reason.full = 0xdead;
+ else
+ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+
+ if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+ is_nmi(vmx_get_intr_info(vcpu))) {
+ kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+ vmx_do_nmi_irqoff();
+ kvm_after_interrupt(vcpu);
+ }
+
guest_state_exit_irqoff();
}
@@ -7260,12 +7271,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
- if (unlikely(vmx->fail)) {
- vmx->exit_reason.full = 0xdead;
+ if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
- }
- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9de72586f406..44d1827f0a30 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -382,13 +382,13 @@ enum kvm_intr_type {
KVM_HANDLING_NMI,
};
-static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
- enum kvm_intr_type intr)
+static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
+ enum kvm_intr_type intr)
{
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
}
-static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
}