Re: [PATCH v2] KVM: SVM: use vmsave/vmload for saving/restoring additional host state

From: Sean Christopherson
Date: Mon Dec 14 2020 - 14:39:44 EST


+Andy, who provided a lot of feedback on v1.

On Mon, Dec 14, 2020, Michael Roth wrote:

Cc: Andy Lutomirski <luto@xxxxxxxxxx>

> Suggested-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
> Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
> ---
> v2:
> * rebase on latest kvm/next
> * move VMLOAD to just after vmexit so we can use it to handle all FS/GS
> host state restoration and rather than relying on loadsegment() and
> explicit write to MSR_GS_BASE (Andy)
> * drop 'host' field from struct vcpu_svm since it is no longer needed
> for storing FS/GS/LDT state (Andy)
> ---
> arch/x86/kvm/svm/svm.c | 44 ++++++++++++++++--------------------------
> arch/x86/kvm/svm/svm.h | 14 +++-----------
> 2 files changed, 20 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 0e52fac4f5ae..fb15b7bd461f 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1367,15 +1367,19 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> vmcb_mark_all_dirty(svm->vmcb);
> }
>
> -#ifdef CONFIG_X86_64
> - rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
> -#endif
> - savesegment(fs, svm->host.fs);
> - savesegment(gs, svm->host.gs);
> - svm->host.ldt = kvm_read_ldt();
> -
> - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
> + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
> rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
> + }

Unnecessary change that violates preferred coding style. Checkpatch explicitly
complains about this.

WARNING: braces {} are not necessary for single statement blocks
#132: FILE: arch/x86/kvm/svm/svm.c:1370:
+ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+

> +
> + asm volatile(__ex("vmsave")
> + : : "a" (page_to_pfn(sd->save_area) << PAGE_SHIFT)

I'm pretty sure this can be page_to_phys().

> + : "memory");

I think we can defer this until we're actually planning on running the guest,
i.e. put this in svm_prepare_guest_switch().

> + /*
> + * Store a pointer to the save area to we can access it after
> + * vmexit for vmload. This is needed since per-cpu accesses
> + * won't be available until GS is restored as part of vmload
> + */
> + svm->host_save_area = sd->save_area;

Unless I'm missing something with how SVM loads guest state, you can avoid
adding host_save_area by saving the PA of the save area on the stack prior to
the vmload of guest state.

>
> if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
> u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
> @@ -1403,18 +1407,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
> avic_vcpu_put(vcpu);
>
> ++vcpu->stat.host_state_reload;
> - kvm_load_ldt(svm->host.ldt);
> -#ifdef CONFIG_X86_64
> - loadsegment(fs, svm->host.fs);
> - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
> - load_gs_index(svm->host.gs);
> -#else
> -#ifdef CONFIG_X86_32_LAZY_GS
> - loadsegment(gs, svm->host.gs);
> -#endif
> -#endif
> - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
> +
> + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
> wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
> + }

Same bogus change and checkpatch warning.

> }
>
> static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
> @@ -3507,14 +3503,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>
> __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);

Tying in with avoiding svm->host_save_area, what about passing in the PA of the
save area and doing the vmload in __svm_vcpu_run()? One less instance of inline
assembly to stare at...

>
> -#ifdef CONFIG_X86_64
> - native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
> -#else
> - loadsegment(fs, svm->host.fs);
> -#ifndef CONFIG_X86_32_LAZY_GS
> - loadsegment(gs, svm->host.gs);
> -#endif
> -#endif
> + asm volatile(__ex("vmload")
> + : : "a" (page_to_pfn(svm->host_save_area) << PAGE_SHIFT));
>
> /*
> * VMEXIT disables interrupts (host state), but tracing and lockdep
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index fdff76eb6ceb..bf01a8c19ec0 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -21,11 +21,6 @@
> #include <asm/svm.h>
>
> static const u32 host_save_user_msrs[] = {
> -#ifdef CONFIG_X86_64
> - MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
> - MSR_FS_BASE,
> -#endif
> - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> MSR_TSC_AUX,

With this being whittled down to TSC_AUX, a good follow-on series would be to
add SVM usage of the "user return" MSRs to handle TSC_AUX. If no one objects,
I'll plan on doing that in the not-too-distant future as a ramp task of sorts.

> };
>
> @@ -117,12 +112,9 @@ struct vcpu_svm {
> u64 next_rip;
>
> u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
> - struct {
> - u16 fs;
> - u16 gs;
> - u16 ldt;
> - u64 gs_base;
> - } host;
> +
> + /* set by vcpu_load(), for use when per-cpu accesses aren't available */
> + struct page *host_save_area;
>
> u64 spec_ctrl;
> /*
> --
> 2.25.1
>