[PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected

From: Wanpeng Li
Date: Thu Aug 24 2017 - 06:35:28 EST


From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>

vmx_complete_interrupts() assumes that the exception is always injected,
so it would be dropped by kvm_clear_exception_queue(). This patch separates
exception.pending from exception.injected, exception.inject represents the
exception is injected or the exception should be reinjected due to vmexit
occurs during event delivery in VMX non-root operation. exception.pending
represents the exception is queued and will be cleared when injecting the
exception to the guest. So exception.pending and exception.injected can
cooperate to guarantee exception will not be lost.

Reported-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
---
v3 -> v4:
* avoid the double fault
* exception must be injected immediately
v2 -> v3:
* the changes to inject_pending_event and adds the WARN_ON
* combine injected and pending exception for GET/SET_VCPU_EVENTS

arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/svm.c | 2 +-
arch/x86/kvm/vmx.c | 4 +--
arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++--------------
arch/x86/kvm/x86.h | 4 +--
5 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9d90787..6e385ac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -547,8 +547,8 @@ struct kvm_vcpu_arch {

struct kvm_queued_exception {
bool pending;
+ bool injected;
bool has_error_code;
- bool reinject;
u8 nr;
u32 error_code;
u8 nested_apf;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a2fddce..6a439a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -655,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
- bool reinject = vcpu->arch.exception.reinject;
+ bool reinject = vcpu->arch.exception.injected;
u32 error_code = vcpu->arch.exception.error_code;

/*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5f43ab..902b780 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2503,7 +2503,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
- bool reinject = vcpu->arch.exception.reinject;
+ bool reinject = vcpu->arch.exception.injected;
u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK;

@@ -10948,7 +10948,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
u32 idt_vectoring;
unsigned int nr;

- if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
+ if (vcpu->arch.exception.injected) {
nr = vcpu->arch.exception.nr;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f41b88..fa79cd7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -389,15 +389,20 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,

kvm_make_request(KVM_REQ_EVENT, vcpu);

- if (!vcpu->arch.exception.pending) {
+ if (!vcpu->arch.exception.pending ||
+ !vcpu->arch.exception.injected) {
queue:
if (has_error && !is_protmode(vcpu))
has_error = false;
- vcpu->arch.exception.pending = true;
+ if (reinject)
+ vcpu->arch.exception.injected = true;
+ else {
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.injected = false;
+ }
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.nr = nr;
vcpu->arch.exception.error_code = error_code;
- vcpu->arch.exception.reinject = reinject;
return;
}

@@ -3069,8 +3074,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
process_nmi(vcpu);
+ /*
+ * FIXME: pass injected and pending separately. This is only
+ * needed for nested virtualization, whose state cannot be
+ * migrated yet. For now we combine them just in case.
+ */
events->exception.injected =
- vcpu->arch.exception.pending &&
+ (vcpu->arch.exception.pending ||
+ vcpu->arch.exception.injected) &&
!kvm_exception_is_soft(vcpu->arch.exception.nr);
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
@@ -3125,6 +3136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;

process_nmi(vcpu);
+ vcpu->arch.exception.injected = false;
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -6350,33 +6362,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
int r;

/* try to reinject previous events if any */
- if (vcpu->arch.exception.pending) {
- trace_kvm_inj_exception(vcpu->arch.exception.nr,
- vcpu->arch.exception.has_error_code,
- vcpu->arch.exception.error_code);
-
- if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
- __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
- X86_EFLAGS_RF);
-
- if (vcpu->arch.exception.nr == DB_VECTOR &&
- (vcpu->arch.dr7 & DR7_GD)) {
- vcpu->arch.dr7 &= ~DR7_GD;
- kvm_update_dr7(vcpu);
- }
-
+ if (vcpu->arch.exception.injected) {
kvm_x86_ops->queue_exception(vcpu);
return 0;
}

- if (vcpu->arch.nmi_injected) {
- kvm_x86_ops->set_nmi(vcpu);
- return 0;
- }
+ /*
+ * Exceptions must be injected immediately, or the exception
+ * frame will have the address of the NMI or interrupt handler.
+ */
+ if (!vcpu->arch.exception.pending) {
+ if (vcpu->arch.nmi_injected) {
+ kvm_x86_ops->set_nmi(vcpu);
+ return 0;
+ }

- if (vcpu->arch.interrupt.pending) {
- kvm_x86_ops->set_irq(vcpu);
- return 0;
+ if (vcpu->arch.interrupt.pending) {
+ kvm_x86_ops->set_irq(vcpu);
+ return 0;
+ }
}

if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
@@ -6386,7 +6390,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}

/* try to inject new event if pending */
- if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+ if (vcpu->arch.exception.pending) {
+ trace_kvm_inj_exception(vcpu->arch.exception.nr,
+ vcpu->arch.exception.has_error_code,
+ vcpu->arch.exception.error_code);
+
+ vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.injected = true;
+ kvm_x86_ops->queue_exception(vcpu);
+
+ if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+ __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+ X86_EFLAGS_RF);
+
+ if (vcpu->arch.exception.nr == DB_VECTOR &&
+ (vcpu->arch.dr7 & DR7_GD)) {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ kvm_update_dr7(vcpu);
+ }
+ } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
vcpu->arch.smi_pending = false;
enter_smm(vcpu);
} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
@@ -6862,6 +6884,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
+ WARN_ON(vcpu->arch.exception.pending);
}

if (kvm_lapic_enabled(vcpu)) {
@@ -7738,6 +7761,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.nmi_injected = false;
kvm_clear_interrupt_queue(vcpu);
kvm_clear_exception_queue(vcpu);
+ vcpu->arch.exception.pending = false;

memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 1134603..e6ec0de 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -11,7 +11,7 @@

static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
- vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.injected = false;
}

static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
@@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)

static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+ return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
vcpu->arch.nmi_injected;
}

--
2.7.4