RE: [PATCH v5 34/34] KVM: x86/vmx: execute "int $2" to handle NMI in NMI caused VM exits when FRED is enabled

From: Li, Xin3
Date: Fri Mar 24 2023 - 13:45:50 EST


> > I'm not dead set against the proposed approach, but IMO it's not
> > obviously better than a bit of assembly to have a more direct call into the NMI
> handler.
>
> I will give it a shot.

Hi Sean,

I got a working patch, before I resend the whole FRED patch set again, can
you please check if this is what you're expecting?

When FRED is enabled, the x86 CPU always pushes an error code on the stack
immediately after the return instruction address is pushed. To generate such
a stack frame, call a trampoline function first to push the return instruction
address on the stack, and the trampoline function then pushes an error code
(0 for IRQ/NMI) and jump to fred_entrypoint_kernel.

I could have vmx_do_interrupt_trampoline jump to fred_entrypoint_kernel
Instead of calling external_interrupt(), but that would reenter the noinstr
text again (not a big problem but seems not preferred by Peter Z).

Thanks!
Xin

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 631fd7da2bc3..6682b5bd202b 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,7 +31,7 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif

-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
+.macro VMX_DO_EVENT_IRQOFF call_insn call_target fred=1 nmi=0
/*
* Unconditionally create a stack frame, getting the correct RSP on the
* stack (for x86-64) would take two instructions anyways, and RBP can
@@ -46,11 +46,34 @@
* creating the synthetic interrupt stack frame for the IRQ/NMI.
*/
and $-16, %rsp
+
+ .if \fred
+ push $0 /* Reserved by FRED, must be 0 */
+ push $0 /* FRED event data, 0 for NMI and external interrupts */
+
+ .if \nmi
+ mov $(2 << 32 | 2 << 48), %_ASM_AX /* NMI event type and vector */
+ .else
+ mov %_ASM_ARG1, %_ASM_AX
+ shl $32, %_ASM_AX /* external interrupt vector */
+ .endif
+ add $__KERNEL_DS, %_ASM_AX
+ bts $57, %_ASM_AX /* bit 57: 64-bit mode */
+ push %_ASM_AX
+ .else
push $__KERNEL_DS
+ .endif
+
push %rbp
#endif
pushf
+ .if \nmi
+ mov $__KERNEL_CS, %_ASM_AX
+ bts $28, %_ASM_AX /* set the NMI bit */
+ push %_ASM_AX
+ .else
push $__KERNEL_CS
+ .endif
\call_insn \call_target

/*
@@ -299,8 +322,19 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)

SYM_FUNC_END(__vmx_vcpu_run)

+SYM_FUNC_START(vmx_do_nmi_trampoline)
+#ifdef CONFIG_X86_FRED
+ ALTERNATIVE "jmp .Lno_errorcode_push", "", X86_FEATURE_FRED
+ push $0 /* FRED error code, 0 for NMI */
+ jmp fred_entrypoint_kernel
+#endif
+
+.Lno_errorcode_push:
+ jmp asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_trampoline)
+
SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+ VMX_DO_EVENT_IRQOFF call vmx_do_nmi_trampoline nmi=1
SYM_FUNC_END(vmx_do_nmi_irqoff)


@@ -358,5 +392,51 @@ SYM_FUNC_END(vmread_error_trampoline)
#endif

SYM_FUNC_START(vmx_do_interrupt_irqoff)
- VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+ VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 fred=0
SYM_FUNC_END(vmx_do_interrupt_irqoff)
+
+#ifdef CONFIG_X86_64
+SYM_FUNC_START(vmx_do_interrupt_trampoline)
+ push $0 /* FRED error code, 0 for NMI and external interrupts */
+ push %rdi
+ push %rsi
+ push %rdx
+ push %rcx
+ push %rax
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ movq %rsp, %rdi /* %rdi -> pt_regs */
+ call external_interrupt
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rax
+ pop %rcx
+ pop %rdx
+ pop %rsi
+ pop %rdi
+ addq $8,%rsp /* Drop FRED error code */
+ RET
+SYM_FUNC_END(vmx_do_interrupt_trampoline)
+
+SYM_FUNC_START(vmx_do_fred_interrupt_irqoff)
+ VMX_DO_EVENT_IRQOFF call vmx_do_interrupt_trampoline
+SYM_FUNC_END(vmx_do_fred_interrupt_irqoff)
+#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d2d6e1b6c788..5addfee5cc6d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6875,6 +6875,7 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
}

void vmx_do_interrupt_irqoff(unsigned long entry);
+void vmx_do_fred_interrupt_irqoff(unsigned int vector);
void vmx_do_nmi_irqoff(void);

static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
@@ -6923,7 +6924,12 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
return;

kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- vmx_do_interrupt_irqoff(gate_offset(desc));
+#ifdef CONFIG_X86_64
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ vmx_do_fred_interrupt_irqoff(vector);
+ else
+#endif
+ vmx_do_interrupt_irqoff(gate_offset(desc));
kvm_after_interrupt(vcpu);

vcpu->arch.at_instruction_boundary = true;