[PATCH 3.16 7/7] KVM: x86: fix singlestepping over syscall

From: Ben Hutchings
Date: Tue Jun 27 2017 - 07:13:13 EST


3.16.45-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Paolo Bonzini <pbonzini@xxxxxxxxxx>

commit c8401dda2f0a00cd25c0af6a95ed50e478d25de4 upstream.

TF is handled a bit differently for syscall and sysret, compared
to the other instructions: TF is checked after the instruction completes,
so that the OS can disable #DB at a syscall by adding TF to FMASK.
When the sysret is executed the #DB is taken "as if" the syscall insn
just completed.

KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
Fix the behavior, otherwise you could get #DB on a user stack which is not
nice. This does not affect Linux guests, as they use an IST or task gate
for #DB.

This fixes CVE-2017-7518.

Reported-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Signed-off-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
[bwh: Backported to 3.16:
- kvm_vcpu_check_singlestep() did not take an rflags parameter but
called get_rflags() itself; delete that code
- kvm_vcpu_check_singlestep() sets some flags differently
- Drop changes to kvm_skip_emulated_instruction()]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -274,6 +274,7 @@ struct x86_emulate_ctxt {
bool guest_mode; /* guest running a nested guest */
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */

bool have_exception;
struct x86_exception exception;
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2310,6 +2310,7 @@ static int em_syscall(struct x86_emulate
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
}

+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4942,6 +4942,8 @@ static void init_emulate_ctxt(struct kvm
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);

ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5132,38 +5134,26 @@ static int kvm_vcpu_check_hw_bp(unsigned
return dr6;
}

-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;

- /*
- * Use the "raw" value to see if TF was passed to the processor.
- * Note that the new value of the flags has not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}

@@ -5316,8 +5306,9 @@ restart:
kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
kvm_set_rflags(vcpu, ctxt->eflags);
} else
vcpu->arch.emulate_regs_need_sync_to_vcpu = true;