Re: [PATCH v2] kvm: nVMX: fix entry with pending interrupt if APICv is enabled

From: Nikita Leshenko
Date: Thu Oct 04 2018 - 05:29:08 EST


On Wed, 2018-10-03 at 17:26 +0200, Paolo Bonzini wrote:
> Commit b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 introduced a check on
> the interrupt-window and NMI-window CPU execution controls in order to
> inject an external interrupt vmexit before the first guest instruction
> executes. However, when APIC virtualization is enabled the host does not
> need a vmexit in order to inject an interrupt at the next interrupt window;
> instead, it just places the interrupt vector in RVI and the processor will
> inject it as soon as possible. Therefore, on machines with APICv it is
> not enough to check the CPU execution controls: the same scenario can also
> happen if RVI>0.
>
> Fixes: b5861e5cf2fcf83031ea3e26b0a69d887adf7d21
> Cc: Nikita Leshchenko <nikita.leshchenko@xxxxxxxxxx>
> Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> Cc: Liran Alon <liran.alon@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>

Reviewed-by: Nikita Leshenko <nikita.leshchenko@xxxxxxxxxx>

> ---
> arch/x86/kvm/vmx.c | 38 ++++++++++++++++++++++++++------------
> 1 file changed, 26 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6ef2d5b139b9..c7ae8ea87bc4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6162,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
> nested_mark_vmcs12_pages_dirty(vcpu);
> }
>
> +static u8 vmx_get_rvi(void)
> +{
> + return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
> +}
> +
> static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -6174,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
> WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
> return false;
>
> - rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
> + rvi = vmx_get_rvi();
>
> vapic_page = kmap(vmx->nested.virtual_apic_page);
> vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
> @@ -10349,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> return max_irr;
> }
>
> +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
> +{
> + u8 rvi = vmx_get_rvi();
> + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
> +
> + return ((rvi & 0xf0) > (vppr & 0xf0));
> +}
> +
> static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
> {
> if (!kvm_vcpu_apicv_active(vcpu))
> @@ -12593,10 +12606,13 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> bool from_vmentry = !!exit_qual;
> u32 dummy_exit_qual;
> - u32 vmcs01_cpu_exec_ctrl;
> + bool evaluate_pending_interrupts;
> int r = 0;
>
> - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
> + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
> + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
> + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
>
> enter_guest_mode(vcpu);
>
> @@ -12644,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
> * to L1 or delivered directly to L2 (e.g. In case L1 don't
> * intercept EXTERNAL_INTERRUPT).
> *
> - * Usually this would be handled by L0 requesting a
> - * IRQ/NMI window by setting VMCS accordingly. However,
> - * this setting was done on VMCS01 and now VMCS02 is active
> - * instead. Thus, we force L0 to perform pending event
> - * evaluation by requesting a KVM_REQ_EVENT.
> - */
> - if (vmcs01_cpu_exec_ctrl &
> - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
> + * Usually this would be handled by the processor noticing an
> + * IRQ/NMI window request, or checking RVI during evaluation of
> + * pending virtual interrupts. However, this setting was done
> + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
> + * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
> + */
> + if (unlikely(evaluate_pending_interrupts))
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> - }
>
> /*
> * Note no nested_vmx_succeed or nested_vmx_fail here. At this point