Re: [PATCH] KVM: x86: SVM: fix nested PAUSE filtering when L0 intercepts PAUSE
From: Maxim Levitsky
Date: Tue Jun 07 2022 - 04:28:09 EST
On Tue, 2022-05-31 at 13:58 -0400, Paolo Bonzini wrote:
> Commit 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0
> doesn't intercept PAUSE") introduced passthrough support for nested
> pause
> filtering, (when the host doesn't intercept PAUSE) (either disabled
> with
> kvm module param, or disabled with '-overcommit cpu-pm=on')
>
> Before this commit, L1 KVM didn't intercept PAUSE at all; afterwards,
> the feature was exposed as supported by KVM cpuid unconditionally,
> thus
> if L1 could try to use it even when the L0 KVM can't really support
> it.
>
> In this case the fallback caused KVM to intercept each PAUSE
> instruction;
> in some cases, such intercept can slow down the nested guest so much
> that it can fail to boot. Instead, before the problematic commit KVM
> was already setting both thresholds to 0 in vmcb02, but after the
> first
> userspace VM exit shrink_ple_window was called and would reset the
> pause_filter_count to the default value.
>
> To fix this, change the fallback strategy - ignore the guest
> threshold
> values, but use/update the host threshold values unless the guest
> specifically requests disabling PAUSE filtering (either simple or
> advanced).
>
> Also fix a minor bug: on nested VM exit, when PAUSE filter counter
> were copied back to vmcb01, a dirty bit was not set.
>
> Thanks a lot to Suravee Suthikulpanit for debugging this!
>
> Fixes: 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0
> doesn't intercept PAUSE")
> Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
> Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
> Co-developed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> Message-Id: <20220518072709.730031-1-mlevitsk@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/kvm/svm/nested.c | 39 +++++++++++++++++++++----------------
> --
> arch/x86/kvm/svm/svm.c | 4 ++--
> 2 files changed, 23 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 6d0233a2469e..88da8edbe1e1 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -642,6 +642,8 @@ static void nested_vmcb02_prepare_control(struct
> vcpu_svm *svm,
> struct kvm_vcpu *vcpu = &svm->vcpu;
> struct vmcb *vmcb01 = svm->vmcb01.ptr;
> struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
> + u32 pause_count12;
> + u32 pause_thresh12;
>
> /*
> * Filled at exit: exit_code, exit_code_hi, exit_info_1,
> exit_info_2,
> @@ -721,27 +723,25 @@ static void
> nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> if (!nested_vmcb_needs_vls_intercept(svm))
> vmcb02->control.virt_ext |=
> VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
>
> + pause_count12 = svm->pause_filter_enabled ? svm-
> >nested.ctl.pause_filter_count : 0;
> + pause_thresh12 = svm->pause_threshold_enabled ? svm-
> >nested.ctl.pause_filter_thresh : 0;
> if (kvm_pause_in_guest(svm->vcpu.kvm)) {
> - /* use guest values since host doesn't use them */
> - vmcb02->control.pause_filter_count =
> - svm->pause_filter_enabled ?
> - svm->nested.ctl.pause_filter_count :
> 0;
> + /* use guest values since host doesn't intercept
> PAUSE */
> + vmcb02->control.pause_filter_count = pause_count12;
> + vmcb02->control.pause_filter_thresh = pause_thresh12;
>
> - vmcb02->control.pause_filter_thresh =
> - svm->pause_threshold_enabled ?
> - svm->nested.ctl.pause_filter_thresh :
> 0;
> -
> - } else if (!vmcb12_is_intercept(&svm->nested.ctl,
> INTERCEPT_PAUSE)) {
> - /* use host values when guest doesn't use them */
> + } else {
> + /* start from host values otherwise */
> vmcb02->control.pause_filter_count = vmcb01-
> >control.pause_filter_count;
> vmcb02->control.pause_filter_thresh = vmcb01-
> >control.pause_filter_thresh;
> - } else {
> - /*
> - * Intercept every PAUSE otherwise and
> - * ignore both host and guest values
> - */
> - vmcb02->control.pause_filter_count = 0;
> - vmcb02->control.pause_filter_thresh = 0;
> +
> + /* ... but ensure filtering is disabled if so
> requested. */
> + if (vmcb12_is_intercept(&svm->nested.ctl,
> INTERCEPT_PAUSE)) {
> + if (!pause_count12)
> + vmcb02->control.pause_filter_count =
> 0;
> + if (!pause_thresh12)
> + vmcb02->control.pause_filter_thresh =
> 0;
> + }
> }
>
> nested_svm_transition_tlb_flush(vcpu);
> @@ -1003,8 +1003,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> vmcb12->control.event_inj = svm-
> >nested.ctl.event_inj;
> vmcb12->control.event_inj_err = svm-
> >nested.ctl.event_inj_err;
>
> - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02-
> >control.pause_filter_count)
> + if (!kvm_pause_in_guest(vcpu->kvm)) {
> vmcb01->control.pause_filter_count = vmcb02-
> >control.pause_filter_count;
> + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
> +
> + }
>
> nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm-
> >vmcb01.ptr);
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 1bd42e7dfa36..4aea82f668fb 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -956,7 +956,7 @@ static void grow_ple_window(struct kvm_vcpu
> *vcpu)
> struct vmcb_control_area *control = &svm->vmcb->control;
> int old = control->pause_filter_count;
>
> - if (kvm_pause_in_guest(vcpu->kvm) || !old)
> + if (kvm_pause_in_guest(vcpu->kvm))
> return;
>
> control->pause_filter_count = __grow_ple_window(old,
> @@ -977,7 +977,7 @@ static void shrink_ple_window(struct kvm_vcpu
> *vcpu)
> struct vmcb_control_area *control = &svm->vmcb->control;
> int old = control->pause_filter_count;
>
> - if (kvm_pause_in_guest(vcpu->kvm) || !old)
> + if (kvm_pause_in_guest(vcpu->kvm))
> return;
>
> control->pause_filter_count =
Thank you Paolo!
Best regards,
Maxim Levitsky