Re: [PATCH] KVM: x86: SVM: fix nested PAUSE filtering when L0 intercepts PAUSE

From: Maxim Levitsky
Date: Tue Jun 07 2022 - 04:28:09 EST


On Tue, 2022-05-31 at 13:58 -0400, Paolo Bonzini wrote:
> Commit 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0
> doesn't intercept PAUSE") introduced passthrough support for nested
> pause
> filtering, (when the host doesn't intercept PAUSE) (either disabled
> with
> kvm module param, or disabled with '-overcommit cpu-pm=on')
>
> Before this commit, L1 KVM didn't intercept PAUSE at all; afterwards,
> the feature was exposed as supported by KVM cpuid unconditionally,
> thus
> if L1 could try to use it even when the L0 KVM can't really support
> it.
>
> In this case the fallback caused KVM to intercept each PAUSE
> instruction;
> in some cases, such intercept can slow down the nested guest so much
> that it can fail to boot.  Instead, before the problematic commit KVM
> was already setting both thresholds to 0 in vmcb02, but after the
> first
> userspace VM exit shrink_ple_window was called and would reset the
> pause_filter_count to the default value.
>
> To fix this, change the fallback strategy - ignore the guest
> threshold
> values, but use/update the host threshold values unless the guest
> specifically requests disabling PAUSE filtering (either simple or
> advanced).
>
> Also fix a minor bug: on nested VM exit, when PAUSE filter counter
> were copied back to vmcb01, a dirty bit was not set.
>
> Thanks a lot to Suravee Suthikulpanit for debugging this!
>
> Fixes: 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0
> doesn't intercept PAUSE")
> Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
> Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
> Co-developed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> Message-Id: <20220518072709.730031-1-mlevitsk@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/kvm/svm/nested.c | 39 +++++++++++++++++++++----------------
> --
>  arch/x86/kvm/svm/svm.c    |  4 ++--
>  2 files changed, 23 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 6d0233a2469e..88da8edbe1e1 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -642,6 +642,8 @@ static void nested_vmcb02_prepare_control(struct
> vcpu_svm *svm,
>         struct kvm_vcpu *vcpu = &svm->vcpu;
>         struct vmcb *vmcb01 = svm->vmcb01.ptr;
>         struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
> +       u32 pause_count12;
> +       u32 pause_thresh12;
>  
>         /*
>          * Filled at exit: exit_code, exit_code_hi, exit_info_1,
> exit_info_2,
> @@ -721,27 +723,25 @@ static void
> nested_vmcb02_prepare_control(struct vcpu_svm *svm,
>         if (!nested_vmcb_needs_vls_intercept(svm))
>                 vmcb02->control.virt_ext |=
> VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
>  
> +       pause_count12 = svm->pause_filter_enabled ? svm-
> >nested.ctl.pause_filter_count : 0;
> +       pause_thresh12 = svm->pause_threshold_enabled ? svm-
> >nested.ctl.pause_filter_thresh : 0;
>         if (kvm_pause_in_guest(svm->vcpu.kvm)) {
> -               /* use guest values since host doesn't use them */
> -               vmcb02->control.pause_filter_count =
> -                               svm->pause_filter_enabled ?
> -                               svm->nested.ctl.pause_filter_count :
> 0;
> +               /* use guest values since host doesn't intercept
> PAUSE */
> +               vmcb02->control.pause_filter_count = pause_count12;
> +               vmcb02->control.pause_filter_thresh = pause_thresh12;
>  
> -               vmcb02->control.pause_filter_thresh =
> -                               svm->pause_threshold_enabled ?
> -                               svm->nested.ctl.pause_filter_thresh :
> 0;
> -
> -       } else if (!vmcb12_is_intercept(&svm->nested.ctl,
> INTERCEPT_PAUSE)) {
> -               /* use host values when guest doesn't use them */
> +       } else {
> +               /* start from host values otherwise */
>                 vmcb02->control.pause_filter_count = vmcb01-
> >control.pause_filter_count;
>                 vmcb02->control.pause_filter_thresh = vmcb01-
> >control.pause_filter_thresh;
> -       } else {
> -               /*
> -                * Intercept every PAUSE otherwise and
> -                * ignore both host and guest values
> -                */
> -               vmcb02->control.pause_filter_count = 0;
> -               vmcb02->control.pause_filter_thresh = 0;
> +
> +               /* ... but ensure filtering is disabled if so
> requested.  */
> +               if (vmcb12_is_intercept(&svm->nested.ctl,
> INTERCEPT_PAUSE)) {
> +                       if (!pause_count12)
> +                               vmcb02->control.pause_filter_count =
> 0;
> +                       if (!pause_thresh12)
> +                               vmcb02->control.pause_filter_thresh =
> 0;
> +               }
>         }
>  
>         nested_svm_transition_tlb_flush(vcpu);
> @@ -1003,8 +1003,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>         vmcb12->control.event_inj         = svm-
> >nested.ctl.event_inj;
>         vmcb12->control.event_inj_err     = svm-
> >nested.ctl.event_inj_err;
>  
> -       if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02-
> >control.pause_filter_count)
> +       if (!kvm_pause_in_guest(vcpu->kvm)) {
>                 vmcb01->control.pause_filter_count = vmcb02-
> >control.pause_filter_count;
> +               vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
> +
> +       }
>  
>         nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm-
> >vmcb01.ptr);
>  
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 1bd42e7dfa36..4aea82f668fb 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -956,7 +956,7 @@ static void grow_ple_window(struct kvm_vcpu
> *vcpu)
>         struct vmcb_control_area *control = &svm->vmcb->control;
>         int old = control->pause_filter_count;
>  
> -       if (kvm_pause_in_guest(vcpu->kvm) || !old)
> +       if (kvm_pause_in_guest(vcpu->kvm))
>                 return;
>  
>         control->pause_filter_count = __grow_ple_window(old,
> @@ -977,7 +977,7 @@ static void shrink_ple_window(struct kvm_vcpu
> *vcpu)
>         struct vmcb_control_area *control = &svm->vmcb->control;
>         int old = control->pause_filter_count;
>  
> -       if (kvm_pause_in_guest(vcpu->kvm) || !old)
> +       if (kvm_pause_in_guest(vcpu->kvm))
>                 return;
>  
>         control->pause_filter_count =

Thank you Paolo!

Best regards,
Maxim Levitsky