Re: [PATCH v3 2/4] KVM: X86: Fix loss of exception which has not yet injected

From: Wanpeng Li
Date: Thu Aug 24 2017 - 02:52:25 EST


2017-08-24 12:21 GMT+08:00 Wanpeng Li <kernellwp@xxxxxxxxx>:
> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>
> vmx_complete_interrupts() assumes that the exception is always injected,
> so it would be dropped by kvm_clear_exception_queue(). This patch separates
> exception.pending from exception.injected, exception.inject represents the
> exception is injected or the exception should be reinjected due to vmexit
> occurs during event delivery in VMX non-root operation. exception.pending
> represents the exception is queued and will be cleared when injecting the
> exception to the guest. So exception.pending and exception.injected can
> cooperate to guarantee exception will not be lost.
>
> Reported-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> v2 -> v3:
> * the changes to inject_pending_event and adds the WARN_ON
> * combine injected and pending exception for GET/SET_VCPU_EVENTS
>
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/svm.c | 2 +-
> arch/x86/kvm/vmx.c | 4 +--
> arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++---------------
> arch/x86/kvm/x86.h | 4 +--
> 5 files changed, 42 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9d90787..6e385ac 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -547,8 +547,8 @@ struct kvm_vcpu_arch {
>
> struct kvm_queued_exception {
> bool pending;
> + bool injected;
> bool has_error_code;
> - bool reinject;
> u8 nr;
> u32 error_code;
> u8 nested_apf;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a2fddce..6a439a1 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -655,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
> struct vcpu_svm *svm = to_svm(vcpu);
> unsigned nr = vcpu->arch.exception.nr;
> bool has_error_code = vcpu->arch.exception.has_error_code;
> - bool reinject = vcpu->arch.exception.reinject;
> + bool reinject = vcpu->arch.exception.injected;
> u32 error_code = vcpu->arch.exception.error_code;
>
> /*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c5f43ab..902b780 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2503,7 +2503,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> unsigned nr = vcpu->arch.exception.nr;
> bool has_error_code = vcpu->arch.exception.has_error_code;
> - bool reinject = vcpu->arch.exception.reinject;
> + bool reinject = vcpu->arch.exception.injected;
> u32 error_code = vcpu->arch.exception.error_code;
> u32 intr_info = nr | INTR_INFO_VALID_MASK;
>
> @@ -10948,7 +10948,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
> u32 idt_vectoring;
> unsigned int nr;
>
> - if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
> + if (vcpu->arch.exception.injected) {
> nr = vcpu->arch.exception.nr;
> idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8f41b88..b698b2f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -389,15 +389,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
>
> kvm_make_request(KVM_REQ_EVENT, vcpu);
>
> - if (!vcpu->arch.exception.pending) {
> + if (!vcpu->arch.exception.pending ||
> + !vcpu->arch.exception.injected) {
> queue:
> if (has_error && !is_protmode(vcpu))
> has_error = false;
> - vcpu->arch.exception.pending = true;
> + if (reinject)
> + vcpu->arch.exception.injected = true;
> + else
> + vcpu->arch.exception.pending = true;
> vcpu->arch.exception.has_error_code = has_error;
> vcpu->arch.exception.nr = nr;
> vcpu->arch.exception.error_code = error_code;
> - vcpu->arch.exception.reinject = reinject;
> return;
> }
>
> @@ -3069,8 +3072,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
> struct kvm_vcpu_events *events)
> {
> process_nmi(vcpu);
> + /*
> + * FIXME: pass injected and pending separately. This is only
> + * needed for nested virtualization, whose state cannot be
> + * migrated yet. For now we combine them just in case.
> + */
> events->exception.injected =
> - vcpu->arch.exception.pending &&
> + (vcpu->arch.exception.pending ||
> + vcpu->arch.exception.injected) &&
> !kvm_exception_is_soft(vcpu->arch.exception.nr);
> events->exception.nr = vcpu->arch.exception.nr;
> events->exception.has_error_code = vcpu->arch.exception.has_error_code;
> @@ -3125,6 +3134,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
> return -EINVAL;
>
> process_nmi(vcpu);
> + vcpu->arch.exception.injected = false;
> vcpu->arch.exception.pending = events->exception.injected;
> vcpu->arch.exception.nr = events->exception.nr;
> vcpu->arch.exception.has_error_code = events->exception.has_error_code;
> @@ -6350,21 +6360,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> int r;
>
> /* try to reinject previous events if any */
> - if (vcpu->arch.exception.pending) {
> - trace_kvm_inj_exception(vcpu->arch.exception.nr,
> - vcpu->arch.exception.has_error_code,
> - vcpu->arch.exception.error_code);
> -
> - if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> - __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> - X86_EFLAGS_RF);
> -
> - if (vcpu->arch.exception.nr == DB_VECTOR &&
> - (vcpu->arch.dr7 & DR7_GD)) {
> - vcpu->arch.dr7 &= ~DR7_GD;
> - kvm_update_dr7(vcpu);
> - }
> -
> + if (vcpu->arch.exception.injected) {
> kvm_x86_ops->queue_exception(vcpu);
> return 0;
> }
> @@ -6386,7 +6382,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> }
>
> /* try to inject new event if pending */
> - if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
> + if (vcpu->arch.exception.pending) {
> + trace_kvm_inj_exception(vcpu->arch.exception.nr,
> + vcpu->arch.exception.has_error_code,
> + vcpu->arch.exception.error_code);
> +
> + vcpu->arch.exception.pending = false;
> + vcpu->arch.exception.injected = true;
> + kvm_x86_ops->queue_exception(vcpu);
> +
> + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> + X86_EFLAGS_RF);
> +
> + if (vcpu->arch.exception.nr == DB_VECTOR &&
> + (vcpu->arch.dr7 & DR7_GD)) {
> + vcpu->arch.dr7 &= ~DR7_GD;
> + kvm_update_dr7(vcpu);
> + }
> + } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
> vcpu->arch.smi_pending = false;
> enter_smm(vcpu);
> } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
> @@ -6862,6 +6876,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> kvm_x86_ops->enable_nmi_window(vcpu);
> if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
> kvm_x86_ops->enable_irq_window(vcpu);
> + WARN_ON(vcpu->arch.exception.pending);

This WARN_ON() is suggested during the review of last version,
however, there are many cases in inject_pending_event() can result in
return directly w/ vcpu->arch.exception.pending is true. Actually I
have already catched the warning several times during the testing. I
think we should remove it when committing.

Regards,
Wanpeng Li

> }
>
> if (kvm_lapic_enabled(vcpu)) {
> @@ -7738,6 +7753,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> vcpu->arch.nmi_injected = false;
> kvm_clear_interrupt_queue(vcpu);
> kvm_clear_exception_queue(vcpu);
> + vcpu->arch.exception.pending = false;
>
> memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
> kvm_update_dr0123(vcpu);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 1134603..e6ec0de 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -11,7 +11,7 @@
>
> static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
> {
> - vcpu->arch.exception.pending = false;
> + vcpu->arch.exception.injected = false;
> }
>
> static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
>
> static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
> {
> - return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
> + return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
> vcpu->arch.nmi_injected;
> }
>
> --
> 2.7.4
>