[RFC PATCH 06/73] KVM: x86: Move VMX interrupt/nmi handling into kvm.ko

From: Lai Jiangshan
Date: Mon Feb 26 2024 - 09:38:01 EST


From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>

Similar to VMX, hardware interrupts/NMI during guest running in PVM will
trigger VM exit and should be handled by host interrupt/NMI handlers.
Therefore, move VMX interrupt/NMI handling into kvm.ko for common usage.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
Co-developed-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
---
arch/x86/include/asm/idtentry.h | 12 ++++----
arch/x86/kernel/nmi.c | 8 +++---
arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/host_entry.S | 50 +++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmenter.S | 43 ----------------------------
arch/x86/kvm/vmx/vmx.c | 14 ++-------
arch/x86/kvm/x86.c | 3 ++
arch/x86/kvm/x86.h | 18 ++++++++++++
8 files changed, 85 insertions(+), 65 deletions(-)
create mode 100644 arch/x86/kvm/host_entry.S

diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 13639e57e1f8..8aab0b50431a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -586,14 +586,14 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);

/* NMI */

-#if IS_ENABLED(CONFIG_KVM_INTEL)
+#if IS_ENABLED(CONFIG_KVM)
/*
- * Special entry point for VMX which invokes this on the kernel stack, even for
- * 64-bit, i.e. without using an IST. asm_exc_nmi() requires an IST to work
- * correctly vs. the NMI 'executing' marker. Used for 32-bit kernels as well
- * to avoid more ifdeffery.
+ * Special entry point for VMX/PVM which invokes this on the kernel stack, even
+ * for 64-bit, i.e. without using an IST. asm_exc_nmi() requires an IST to
+ * work correctly vs. the NMI 'executing' marker. Used for 32-bit kernels as
+ * well to avoid more ifdeffery.
*/
-DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_kvm_vmx);
+DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_kvm);
#endif

DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 17e955ab69fe..265e6b38cc58 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -568,13 +568,13 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
mds_user_clear_cpu_buffers();
}

-#if IS_ENABLED(CONFIG_KVM_INTEL)
-DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
+#if IS_ENABLED(CONFIG_KVM)
+DEFINE_IDTENTRY_RAW(exc_nmi_kvm)
{
exc_nmi(regs);
}
-#if IS_MODULE(CONFIG_KVM_INTEL)
-EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx);
+#if IS_MODULE(CONFIG_KVM)
+EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm);
#endif
#endif

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 80e3fe184d17..97bad203b1b1 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ endif

include $(srctree)/virt/kvm/Makefile.kvm

-kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
+kvm-y += x86.o emulate.o i8259.o irq.o lapic.o host_entry.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
diff --git a/arch/x86/kvm/host_entry.S b/arch/x86/kvm/host_entry.S
new file mode 100644
index 000000000000..6bdf0df06eb0
--- /dev/null
+++ b/arch/x86/kvm/host_entry.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/nospec-branch.h>
+#include <asm/segment.h>
+
+.macro KVM_DO_EVENT_IRQOFF call_insn call_target
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+ /*
+ * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+ * creating the synthetic interrupt stack frame for the IRQ/NMI.
+ */
+ and $-16, %rsp
+ push $__KERNEL_DS
+ push %rbp
+#endif
+ pushf
+ push $__KERNEL_CS
+ \call_insn \call_target
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ mov %_ASM_BP, %_ASM_SP
+ pop %_ASM_BP
+ RET
+.endm
+
+.section .noinstr.text, "ax"
+
+SYM_FUNC_START(kvm_do_host_nmi_irqoff)
+ KVM_DO_EVENT_IRQOFF call asm_exc_nmi_kvm
+SYM_FUNC_END(kvm_do_host_nmi_irqoff)
+
+.section .text, "ax"
+
+SYM_FUNC_START(kvm_do_host_interrupt_irqoff)
+ KVM_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(kvm_do_host_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 906ecd001511..12b7b99a9dd8 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,39 +31,6 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif

-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
- /*
- * Unconditionally create a stack frame, getting the correct RSP on the
- * stack (for x86-64) would take two instructions anyways, and RBP can
- * be used to restore RSP to make objtool happy (see below).
- */
- push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
-
-#ifdef CONFIG_X86_64
- /*
- * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
- * creating the synthetic interrupt stack frame for the IRQ/NMI.
- */
- and $-16, %rsp
- push $__KERNEL_DS
- push %rbp
-#endif
- pushf
- push $__KERNEL_CS
- \call_insn \call_target
-
- /*
- * "Restore" RSP from RBP, even though IRET has already unwound RSP to
- * the correct value. objtool doesn't know the callee will IRET and,
- * without the explicit restore, thinks the stack is getting walloped.
- * Using an unwind hint is problematic due to x86-64's dynamic alignment.
- */
- mov %_ASM_BP, %_ASM_SP
- pop %_ASM_BP
- RET
-.endm
-
.section .noinstr.text, "ax"

/**
@@ -299,10 +266,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)

SYM_FUNC_END(__vmx_vcpu_run)

-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT

/**
@@ -354,9 +317,3 @@ SYM_FUNC_START(vmread_error_trampoline)
RET
SYM_FUNC_END(vmread_error_trampoline)
#endif
-
-.section .text, "ax"
-
-SYM_FUNC_START(vmx_do_interrupt_irqoff)
- VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
-SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index be20a60047b1..fca47304506e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6920,9 +6920,6 @@ static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}

-void vmx_do_interrupt_irqoff(unsigned long entry);
-void vmx_do_nmi_irqoff(void);
-
static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{
/*
@@ -6968,9 +6965,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;

- kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- vmx_do_interrupt_irqoff(gate_offset(desc));
- kvm_after_interrupt(vcpu);
+ kvm_do_interrupt_irqoff(vcpu, gate_offset(desc));

vcpu->arch.at_instruction_boundary = true;
}
@@ -7260,11 +7255,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);

if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
- is_nmi(vmx_get_intr_info(vcpu))) {
- kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
- kvm_after_interrupt(vcpu);
- }
+ is_nmi(vmx_get_intr_info(vcpu)))
+ kvm_do_nmi_irqoff(vcpu);

out:
guest_state_exit_irqoff();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 35ad6dd5eaf6..96f3913f7fc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13784,6 +13784,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
}
EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);

+EXPORT_SYMBOL_GPL(kvm_do_host_nmi_irqoff);
+EXPORT_SYMBOL_GPL(kvm_do_host_interrupt_irqoff);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5184fde1dc54..4d1430f8874b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -491,6 +491,24 @@ static inline void kvm_machine_check(void)
#endif
}

+void kvm_do_host_nmi_irqoff(void);
+void kvm_do_host_interrupt_irqoff(unsigned long entry);
+
+static __always_inline void kvm_do_nmi_irqoff(struct kvm_vcpu *vcpu)
+{
+ kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+ kvm_do_host_nmi_irqoff();
+ kvm_after_interrupt(vcpu);
+}
+
+static inline void kvm_do_interrupt_irqoff(struct kvm_vcpu *vcpu,
+ unsigned long entry)
+{
+ kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
+ kvm_do_host_interrupt_irqoff(entry);
+ kvm_after_interrupt(vcpu);
+}
+
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
int kvm_spec_ctrl_test_value(u64 value);
--
2.19.1.6.gb485710b