[PATCH] KVM: VMX: Remove .fixup usage in VMREAD fault path

From: Sean Christopherson
Date: Fri Nov 05 2021 - 13:40:21 EST


Rework the VMREAD magic to eliminate usage of custom .fixup as a step
toward eliminating .fixup throughout the kernel. Because the compiler
can use the same register for both the input and output, e.g. generate
VMREAD %rax, %rax, hijacking the output register to store the "fault"
flag is not an option. To avoid yet another flavor of fixup, overload
"field" to treat -EFAULT as a magic value.

Because -EFAULT is (obviously) a negative value, collision with a VMCS
field encoding is impossible on 64-bit kernels due to the encodings being
32-bit values. And for 32-bit fields, no known VMCS field can collide
with the magic value because despite field encodings being 32-bit values,
bits 31:15 are currently reserved and must be zero. These shenanigans
can be revisited in the extremely unlikely event that a future CPU
supports a "negative" VMCS encoding. And hopefully this entire mess will
go away before that happens, as the trampoline shenanigans are needed
only because the minimum compiler version doesn't guarantee support for
asm goto with outputs.

Alternatively, the fault information could be encoded in EFLAGS since
VM-Fail sets arithmetic flags to well-known values. However, that would
require yet another flavor of fixup and would complicate the already ugly
asm trampoline.

Forcing different output registers is undesirable as doing so would
prevent the compiler from fully optimizing the happy path, which is what
KVM really cares about since faults and failures on VMREAD occur if and
only if there is a KVM bug or hardware error, i.e. are very rare events.

Because a false positive is theoretically possible, e.g. if KVM executed
VMREAD with -EFAULT as the target field, WARN and log a suspected fault
but don't call kvm_spurious_fault(). If VMREAD really did fault, either
KVM is all bug guaranteed to hit a fault on another VMX instruction due
to VMX being disabled/broken, or the fault was specific to _that_ VMREAD,
e.g. due to a bad memory operand, in which case killing KVM isn't
necessary since the faulting VMREAD will "return" '0'.

Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/vmx/vmenter.S | 13 +++++--------
arch/x86/kvm/vmx/vmx.c | 13 ++++++++++---
arch/x86/kvm/vmx/vmx_ops.h | 30 ++++++++++++------------------
3 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 3a6461694fc2..f0f5487de077 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -240,8 +240,7 @@ SYM_FUNC_END(__vmx_vcpu_run)

/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
- * @field: VMCS field encoding that failed
- * @fault: %true if the VMREAD faulted, %false if it failed
+ * @field: VMCS field encoding that failed, -EFAULT on fault

* Save and restore volatile registers across a call to vmread_error(). Note,
* all parameters are passed on the stack.
@@ -262,23 +261,21 @@ SYM_FUNC_START(vmread_error_trampoline)
push %r11
#endif
#ifdef CONFIG_X86_64
- /* Load @field and @fault to arg1 and arg2 respectively. */
- mov 3*WORD_SIZE(%rbp), %_ASM_ARG2
+ /* Load @field arg1. */
mov 2*WORD_SIZE(%rbp), %_ASM_ARG1
#else
/* Parameters are passed on the stack for 32-bit (see asmlinkage). */
- push 3*WORD_SIZE(%ebp)
push 2*WORD_SIZE(%ebp)
#endif

call vmread_error

#ifndef CONFIG_X86_64
- add $8, %esp
+ add $4, %esp
#endif

- /* Zero out @fault, which will be popped into the result register. */
- _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
+ /* Zero out @field, which will be popped into the result register. */
+ _ASM_MOV $0, 2*WORD_SIZE(%_ASM_BP)

#ifdef CONFIG_X86_64
pop %r11
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9a979279a37b..ddce443baa9c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -373,10 +373,17 @@ do { \
pr_warn_ratelimited(fmt); \
} while (0)

-asmlinkage void vmread_error(unsigned long field, bool fault)
+asmlinkage void vmread_error(unsigned long field)
{
- if (fault)
- kvm_spurious_fault();
+ /*
+ * @field is stuff with -EFAULT if VMREAD faulted. There is a teeny
+ * tiny chance of a false positive, i.e. KVM could attempt to VMREAD
+ * -EFAULT if things go sidways, so just log the fault instead of
+ * doing kvm_spurious_fault(). If VMREAD did fault, KVM is all but
+ * guaranteed to die on a VMWRITE, VMRESUME, etc... fault soon enough.
+ */
+ if (field == (unsigned long)-EFAULT)
+ vmx_insn_failed("kvm: vmread faulted\n");
else
vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
}
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 9e9ef47e988c..5e5113d2b324 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -10,9 +10,8 @@
#include "vmcs.h"
#include "x86.h"

-asmlinkage void vmread_error(unsigned long field, bool fault);
-__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
- bool fault);
+asmlinkage void vmread_error(unsigned long field);
+__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field);
void vmwrite_error(unsigned long field, unsigned long value);
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
@@ -76,29 +75,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
"ja 3f\n\t"

/*
- * VMREAD failed. Push '0' for @fault, push the failing
- * @field, and bounce through the trampoline to preserve
- * volatile registers.
+ * VMREAD failed, push the failing @field, and bounce
+ * through the trampoline to preserve volatile registers.
+ * If VMREAD faults, this will push -FAULT (see below).
*/
- "push $0\n\t"
- "push %2\n\t"
- "2:call vmread_error_trampoline\n\t"
+ "2: push %2\n\t"
+ "call vmread_error_trampoline\n\t"

/*
* Unwind the stack. Note, the trampoline zeros out the
- * memory for @fault so that the result is '0' on error.
+ * memory for @field so that the result is '0' on error,
+ * hence the pop to %1, not %2.
*/
- "pop %2\n\t"
"pop %1\n\t"
"3:\n\t"

- /* VMREAD faulted. As above, except push '1' for @fault. */
- ".pushsection .fixup, \"ax\"\n\t"
- "4: push $1\n\t"
- "push %2\n\t"
- "jmp 2b\n\t"
- ".popsection\n\t"
- _ASM_EXTABLE(1b, 4b)
+ /* VMREAD faulted. As above, except push '-EFAULT' for @fault. */
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1)
+
: ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc");
return value;
}
--
2.34.0.rc0.344.g81b53c2807-goog