RE: [PATCH v2 09/25] KVM: VMX: Switch FRED RSP0 between host and guest

From: Li, Xin3
Date: Thu Jul 18 2024 - 13:44:58 EST


> > thus we need to:
> >
> > 1) *always* save guest FRED RSP0 in vmx_prepare_switch_to_host().
> >
> > 2) *always* restore guest FRED RSP0 in vmx_prepare_switch_to_guest(),
> > because sometimes context switches happen but the CPU does NOT
> > return to user mode thus the user return framework detects no change.
> >
> > So it essentially becomes the same as what the original patch does.
> >
> > I guess It's probably not worth the change, how do you think?
>
> One idea would be to have the kernel write MSR_IA32_FRED_RSP0 on return to
> userspace instead of on context switch. As is, amusingly, there's no need to
> restore the host value if a context switch occurs as the kernel will have written
> the new task's value. RSP0 only needs to be manually restored if the kernel
> returns to userspace for the vCPU task. Using a TI flag to track if RSP0 needs to
> be loaded would avoid a fair number of WRMSRs in both KVM and the kernel.

I also thought about it (while in the FRED ERETU code path), however
we will need performance data first, even an extra check
(ti_work & _TIF_NEED_RSP0_LOAD) seems neglectable, but both return to
user and context switch are hot paths, and I assume return to user is
extremely hot.

So let's revisit it at a later time?

> diff --git a/arch/x86/include/asm/entry-common.h
> b/arch/x86/include/asm/entry-common.h
> index ce8f50192ae3..76724cc42869 100644
> --- a/arch/x86/include/asm/entry-common.h
> +++ b/arch/x86/include/asm/entry-common.h
> @@ -57,6 +57,11 @@ static inline void arch_exit_to_user_mode_prepare(struct
> pt_regs *regs,
> if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
> switch_fpu_return();
>
> + if (cpu_feature_enabled(X86_FEATURE_FRED) &&
> + (ti_work & _TIF_NEED_RSP0_LOAD))
> + wrmsrns(MSR_IA32_FRED_RSP0,
> + (unsigned long)task_stack_page(current) +
> + THREAD_SIZE);
> +
> #ifdef CONFIG_COMPAT
> /*
> * Compat syscalls set TS_COMPAT. Make sure we clear it before diff --git
> a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
> index c3bd0c0758c9..1674d98a8850 100644
> --- a/arch/x86/include/asm/switch_to.h
> +++ b/arch/x86/include/asm/switch_to.h
> @@ -71,8 +71,7 @@ static inline void update_task_stack(struct task_struct *task)
> this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); #else
> if (cpu_feature_enabled(X86_FEATURE_FRED)) {
> - /* WRMSRNS is a baseline feature for FRED. */
> - wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) +
> THREAD_SIZE);
> + set_thread_flag(TIF_NEED_RSP0_LOAD);
> } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
> /* Xen PV enters the kernel on the thread stack. */
> load_sp0(task_top_of_stack(task));
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index
> 5c6bb26463e8..cb7e3bcb001f 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1338,15 +1338,9 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu
> *vcpu)
>
> wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
>
> - if (guest_can_use(vcpu, X86_FEATURE_FRED)) {
> - /*
> - * MSR_IA32_FRED_RSP0 is top of task stack, which never changes.
> - * Thus it should be initialized only once.
> - */
> - if (unlikely(vmx->msr_host_fred_rsp0 == 0))
> - vmx->msr_host_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0);
> - wrmsrl(MSR_IA32_FRED_RSP0, vmx->msr_guest_fred_rsp0);
> - }
> + if (cpu_feature_enabled(X86_FEATURE_FRED) &&

This check seems unnecessary, I guess you add it to skip the following
code when FRED is not enabled on host?

> + guest_can_use(vcpu, X86_FEATURE_FRED))
> + wrmsrns(MSR_IA32_FRED_RSP0, vmx->msr_guest_fred_rsp0);
> #else
> savesegment(fs, fs_sel);
> savesegment(gs, gs_sel);
> @@ -1392,9 +1386,10 @@ static void vmx_prepare_switch_to_host(struct
> vcpu_vmx *vmx) #ifdef CONFIG_X86_64
> wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
>
> - if (guest_can_use(&vmx->vcpu, X86_FEATURE_FRED)) {
> + if (cpu_feature_enabled(X86_FEATURE_FRED) &&
> + guest_can_use(&vmx->vcpu, X86_FEATURE_FRED)) {
> vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0);
> - wrmsrl(MSR_IA32_FRED_RSP0, vmx->msr_host_fred_rsp0);
> + set_thread_flag(TIF_NEED_RSP0_LOAD);
> }
> #endif
> load_fixmap_gdt(raw_smp_processor_id());