Re: [PATCH v2 01/11] KVM: nSVM: Sync next_rip field from vmcb12 to vmcb02

From: Maxim Levitsky
Date: Thu Apr 28 2022 - 05:46:19 EST


On Sat, 2022-04-23 at 02:14 +0000, Sean Christopherson wrote:
> From: Maciej S. Szmigiero <maciej.szmigiero@xxxxxxxxxx>
>
> The next_rip field of a VMCB is *not* an output-only field for a VMRUN.
> This field value (instead of the saved guest RIP) in used by the CPU for
> the return address pushed on stack when injecting a software interrupt or
> INT3 or INTO exception.
>
> Make sure this field gets synced from vmcb12 to vmcb02 when entering L2 or
> loading a nested state and NRIPS is exposed to L1. If NRIPS is supported
> in hardware but not exposed to L1 (nrips=0 or hidden by userspace), stuff
> vmcb02's next_rip from the new L2 RIP to emulate a !NRIPS CPU (which
> saves RIP on the stack as-is).
>
> Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@xxxxxxxxxx>
> Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/kvm/svm/nested.c | 22 +++++++++++++++++++---
> arch/x86/kvm/svm/svm.h | 1 +
> 2 files changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index bed5e1692cef..461c5f247801 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -371,6 +371,7 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> to->nested_ctl = from->nested_ctl;
> to->event_inj = from->event_inj;
> to->event_inj_err = from->event_inj_err;
> + to->next_rip = from->next_rip;
> to->nested_cr3 = from->nested_cr3;
> to->virt_ext = from->virt_ext;
> to->pause_filter_count = from->pause_filter_count;
> @@ -608,7 +609,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
> }
> }
>
> -static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
> +static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> + unsigned long vmcb12_rip)

I know that I already reviewed this, but why do we need to pass an extra
parameter to nested_vmcb02_prepare_control.
Lets just put that value in the cache to be consistent with the rest?

Best regards,
Maxim Levitsky


> {
> u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
> u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
> @@ -662,6 +664,19 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
> vmcb02->control.event_inj = svm->nested.ctl.event_inj;
> vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err;
>
> + /*
> + * next_rip is consumed on VMRUN as the return address pushed on the
> + * stack for injected soft exceptions/interrupts. If nrips is exposed
> + * to L1, take it verbatim from vmcb12. If nrips is supported in
> + * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
> + * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
> + * prior to injecting the event).
> + */
> + if (svm->nrips_enabled)
> + vmcb02->control.next_rip = svm->nested.ctl.next_rip;
> + else if (boot_cpu_has(X86_FEATURE_NRIPS))
> + vmcb02->control.next_rip = vmcb12_rip;
> +
> vmcb02->control.virt_ext = vmcb01->control.virt_ext &
> LBR_CTL_ENABLE_MASK;
> if (svm->lbrv_enabled)
> @@ -745,7 +760,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
> nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
>
> svm_switch_vmcb(svm, &svm->nested.vmcb02);
> - nested_vmcb02_prepare_control(svm);
> + nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
> nested_vmcb02_prepare_save(svm, vmcb12);
>
> ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
> @@ -1418,6 +1433,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
> dst->nested_ctl = from->nested_ctl;
> dst->event_inj = from->event_inj;
> dst->event_inj_err = from->event_inj_err;
> + dst->next_rip = from->next_rip;
> dst->nested_cr3 = from->nested_cr3;
> dst->virt_ext = from->virt_ext;
> dst->pause_filter_count = from->pause_filter_count;
> @@ -1602,7 +1618,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> nested_copy_vmcb_control_to_cache(svm, ctl);
>
> svm_switch_vmcb(svm, &svm->nested.vmcb02);
> - nested_vmcb02_prepare_control(svm);
> + nested_vmcb02_prepare_control(svm, save->rip);
>
> /*
> * While the nested guest CR3 is already checked and set by
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 32220a1b0ea2..7d97e4d18c8b 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -139,6 +139,7 @@ struct vmcb_ctrl_area_cached {
> u64 nested_ctl;
> u32 event_inj;
> u32 event_inj_err;
> + u64 next_rip;
> u64 nested_cr3;
> u64 virt_ext;
> u32 clean;