Re: [patch 2/2] x86/kvm/vmx: Prevent split lock detection induced #AC wreckage

From: Sean Christopherson
Date: Thu Apr 02 2020 - 11:30:38 EST


On Thu, Apr 02, 2020 at 02:33:00PM +0200, Thomas Gleixner wrote:
> Without at least minimal handling for split lock detection induced #AC, VMX
> will just run into the same problem as the VMWare hypervisor, which was
> reported by Kenneth.
>
> It will inject the #AC blindly into the guest whether the guest is prepared
> or not.
>
> Add the minimal required handling for it:
>
> - Check guest state whether CR0.AM is enabled and EFLAGS.AC is set. If
> so, then the #AC originated from CPL3 and the guest has is prepared to
> handle it. In this case it does not matter whether the #AC is due to a
> split lock or a regular unaligned check.
>
> - Invoke a minimal split lock detection handler. If the host SLD mode is
> sld_warn, then handle it in the same way as user space handling works:
> Emit a warning, disable SLD and mark the current task with TIF_SLD.
> With that resume the guest without injecting #AC.
>
> If the host mode is sld_fatal or sld_off, emit a warning and deliver
> the exception to user space which can crash and burn itself.
>
> Mark the module with MOD_INFO(sld_safe, "Y") so the module loader does not
> force SLD off.

Some comments below. But, any objection to taking Xiaoyao's patches that
do effectively the same things, minus the MOD_INFO()? I'll repost them in
reply to this thread.

> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: "Kenneth R. Crudup" <kenny@xxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx>
> Cc: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
> Cc: Nadav Amit <namit@xxxxxxxxxx>
> Cc: Thomas Hellstrom <thellstrom@xxxxxxxxxx>
> Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> Cc: Tony Luck <tony.luck@xxxxxxxxx>
> ---
> arch/x86/include/asm/cpu.h | 1 +
> arch/x86/kernel/cpu/intel.c | 28 +++++++++++++++++++++++-----
> arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++++++++++++++++++++++---
> 3 files changed, 61 insertions(+), 8 deletions(-)
>
> --- a/arch/x86/include/asm/cpu.h
> +++ b/arch/x86/include/asm/cpu.h
> @@ -44,6 +44,7 @@ unsigned int x86_stepping(unsigned int s
> extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
> extern void switch_to_sld(unsigned long tifn);
> extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
> +extern bool handle_guest_split_lock(unsigned long ip);
> extern void split_lock_validate_module_text(struct module *me, void *text, void *text_end);
> #else
> static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
> --- a/arch/x86/kernel/cpu/intel.c
> +++ b/arch/x86/kernel/cpu/intel.c
> @@ -1102,13 +1102,10 @@ static void split_lock_init(void)
> split_lock_verify_msr(sld_state != sld_off);
> }
>
> -bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +static void split_lock_warn(unsigned long ip)
> {
> - if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
> - return false;
> -
> pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
> - current->comm, current->pid, regs->ip);
> + current->comm, current->pid, ip);
>
> /*
> * Disable the split lock detection for this task so it can make
> @@ -1117,6 +1114,27 @@ bool handle_user_split_lock(struct pt_re
> */
> sld_update_msr(false);
> set_tsk_thread_flag(current, TIF_SLD);
> +}
> +
> +bool handle_guest_split_lock(unsigned long ip)
> +{
> + if (sld_state == sld_warn) {
> + split_lock_warn(ip);
> + return true;
> + }
> +
> + pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
> + current->comm, current->pid,
> + sld_state == sld_fatal ? "fatal" : "bogus", ip);
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(handle_guest_split_lock);
> +
> +bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +{
> + if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
> + return false;
> + split_lock_warn(regs->ip);
> return true;
> }
>
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -65,6 +65,7 @@
>
> MODULE_AUTHOR("Qumranet");
> MODULE_LICENSE("GPL");
> +MODULE_INFO(sld_safe, "Y");
>
> #ifdef MODULE
> static const struct x86_cpu_id vmx_cpu_id[] = {
> @@ -4623,6 +4624,22 @@ static int handle_machine_check(struct k
> return 1;
> }
>
> +static bool guest_handles_ac(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * If guest has alignment checking enabled in CR0 and activated in
> + * eflags, then the #AC originated from CPL3 and the guest is able
> + * to handle it. It does not matter whether this is a regular or
> + * a split lock operation induced #AC.
> + */
> + if (vcpu->arch.cr0 & X86_CR0_AM &&

Technically not required since KVM doesn't let the gets toggle CR0.AM at
will, but going through kvm_read_cr0{_bits}() is preferred.

> + vmx_get_rflags(vcpu) & X86_EFLAGS_AC)

I don't think this is correct. A guest could trigger a split-lock #AC at
CPL0 with EFLAGS.AC=1 and CR0.AM=1, and then panic because it didn't expect
#AC at CPL0.

> + return true;
> +
> + /* Add guest SLD handling checks here once it's supported */
> + return false;
> +}
> +
> static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4688,9 +4705,6 @@ static int handle_exception_nmi(struct k
> return handle_rmode_exception(vcpu, ex_no, error_code);
>
> switch (ex_no) {
> - case AC_VECTOR:
> - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> - return 1;
> case DB_VECTOR:
> dr6 = vmcs_readl(EXIT_QUALIFICATION);
> if (!(vcpu->guest_debug &
> @@ -4719,6 +4733,26 @@ static int handle_exception_nmi(struct k
> kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> kvm_run->debug.arch.exception = ex_no;
> break;
> + case AC_VECTOR:
> + if (guest_handles_ac(vcpu)) {
> + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> + return 1;
> + }
> + /*
> + * Handle #AC caused by split lock detection. If the host
> + * mode is sld_warn, then it warns, marks current with
> + * TIF_SLD and disables split lock detection. So the guest
> + * can just continue.
> + *
> + * If the host mode is fatal, the handling code warned. Let
> + * qemu kill itself.
> + *
> + * If the host mode is off, then this #AC is bonkers and
> + * something is badly wrong. Let it fail as well.
> + */
> + if (handle_guest_split_lock(kvm_rip_read(vcpu)))
> + return 1;
> + /* fall through */
> default:
> kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
> kvm_run->ex.exception = ex_no;
>