Re: [PATCH v7 24/26] KVM: x86: Enable CET virtualization for VMX and advertise to userspace

From: Maxim Levitsky
Date: Tue Dec 05 2023 - 05:07:59 EST


On Sat, 2023-12-02 at 00:15 +0800, Yang, Weijiang wrote:
> On 12/1/2023 1:46 AM, Maxim Levitsky wrote:
>
> [...]
>
> > >
> > > +static inline bool cpu_has_vmx_basic_no_hw_errcode(void)
> > > +{
> > > + return ((u64)vmcs_config.basic_cap << 32) &
> > > + VMX_BASIC_NO_HW_ERROR_CODE_CC;
> > > +}
> > I still think that we should add a comment explaining why this check is needed,
> > as I said in the previous review.
>
> OK, I'll add some comments above the function. Thanks!
>
> > > +
> > > static inline bool cpu_has_virtual_nmis(void)
> > > {
> > > return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index c658f2f230df..a1aae8709939 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -2614,6 +2614,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> > > { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
> > > { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
> > > { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
> > > + { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
> > > };
> > >
> > > memset(vmcs_conf, 0, sizeof(*vmcs_conf));
> > > @@ -4935,6 +4936,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > >
> > > vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
> > >
> > > + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > > + vmcs_writel(GUEST_SSP, 0);
> > > + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK) ||
> > > + kvm_cpu_cap_has(X86_FEATURE_IBT))
> > > + vmcs_writel(GUEST_S_CET, 0);
> > > + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
> > > + IS_ENABLED(CONFIG_X86_64))
> > > + vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
> > Looks reasonable now.
> > > +
> > > kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
> > >
> > > vpid_sync_context(vmx->vpid);
> > > @@ -6354,6 +6364,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> > > if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
> > > vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
> > >
> > > + if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
> > > + pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
> > > + pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
> > > + pr_err("INTR SSP TABLE = 0x%016lx\n",
> > > + vmcs_readl(GUEST_INTR_SSP_TABLE));
> > > + }
> > > pr_err("*** Host State ***\n");
> > > pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
> > > vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
> > > @@ -6431,6 +6447,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> > > if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
> > > pr_err("Virtual processor ID = 0x%04x\n",
> > > vmcs_read16(VIRTUAL_PROCESSOR_ID));
> > > + if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
> > > + pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
> > > + pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
> > > + pr_err("INTR SSP TABLE = 0x%016lx\n",
> > > + vmcs_readl(HOST_INTR_SSP_TABLE));
> > > + }
> > > }
> > >
> > > /*
> > > @@ -7964,7 +7986,6 @@ static __init void vmx_set_cpu_caps(void)
> > > kvm_cpu_cap_set(X86_FEATURE_UMIP);
> > >
> > > /* CPUID 0xD.1 */
> > > - kvm_caps.supported_xss = 0;
> > > if (!cpu_has_vmx_xsaves())
> > > kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
> > >
> > > @@ -7976,6 +7997,12 @@ static __init void vmx_set_cpu_caps(void)
> > >
> > > if (cpu_has_vmx_waitpkg())
> > > kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
> > > +
> > > + if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
> > > + !cpu_has_vmx_basic_no_hw_errcode()) {
> > > + kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
> > > + kvm_cpu_cap_clear(X86_FEATURE_IBT);
> > > + }
> > My review feedback from previous version still applies here, I don't have an
> > idea why this was not addressed....
> >
> > "I think that here we also need to clear kvm_caps.supported_xss,
> > or even better, lets set the CET bits in kvm_caps.supported_xss only
> > once CET is fully enabled (both this check and check in __kvm_x86_vendor_init pass)."
>
> Ah, previously I had a helper to check whether CET bits were enabled in kvm_caps.supported_xss,
> so need to set the bits earlier before vmx's hardware_setup. I still want to keep the code as-is
> in case other features need to check the their related bits set before configure something in
> vmx hardware_setup.

As long as the code is correct I won't object.


Best regards,
Maxim Levitsky

>
> > In addition to that I just checked and unless I am mistaken:
> >
> > vmx_set_cpu_caps() is called from vmx's hardware_setup(), which is called
> > from __kvm_x86_vendor_init.
> >
> > After this call, __kvm_x86_vendor_init does clear kvm_caps.supported_xss,
> > but doesn't do this if the above code cleared X86_FEATURE_SHSTK/X86_FEATURE_IBT.
> >
> Yeah, I checked the history, the similar logic was there until v6, I can pick it up, thanks!
>
> > > }
> > >
> > > static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
> > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > > index c2130d2c8e24..fb72819fbb41 100644
> > > --- a/arch/x86/kvm/vmx/vmx.h
> > > +++ b/arch/x86/kvm/vmx/vmx.h
> > > @@ -480,7 +480,8 @@ static inline u8 vmx_get_rvi(void)
> > > VM_ENTRY_LOAD_IA32_EFER | \
> > > VM_ENTRY_LOAD_BNDCFGS | \
> > > VM_ENTRY_PT_CONCEAL_PIP | \
> > > - VM_ENTRY_LOAD_IA32_RTIT_CTL)
> > > + VM_ENTRY_LOAD_IA32_RTIT_CTL | \
> > > + VM_ENTRY_LOAD_CET_STATE)
> > >
> > > #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
> > > (VM_EXIT_SAVE_DEBUG_CONTROLS | \
> > > @@ -502,7 +503,8 @@ static inline u8 vmx_get_rvi(void)
> > > VM_EXIT_LOAD_IA32_EFER | \
> > > VM_EXIT_CLEAR_BNDCFGS | \
> > > VM_EXIT_PT_CONCEAL_PIP | \
> > > - VM_EXIT_CLEAR_IA32_RTIT_CTL)
> > > + VM_EXIT_CLEAR_IA32_RTIT_CTL | \
> > > + VM_EXIT_LOAD_CET_STATE)
> > >
> > > #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
> > > (PIN_BASED_EXT_INTR_MASK | \
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index c6b57ede0f57..2bcf3c7923bf 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -231,7 +231,8 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
> > > | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
> > > | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
> > >
> > > -#define KVM_SUPPORTED_XSS 0
> > > +#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_USER | \
> > > + XFEATURE_MASK_CET_KERNEL)
> > >
> > > u64 __read_mostly host_efer;
> > > EXPORT_SYMBOL_GPL(host_efer);
> > > @@ -9854,6 +9855,15 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> > > if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> > > kvm_caps.supported_xss = 0;
> > >
> > > + if ((kvm_caps.supported_xss & (XFEATURE_MASK_CET_USER |
> > > + XFEATURE_MASK_CET_KERNEL)) !=
> > > + (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)) {
> > > + kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
> > > + kvm_cpu_cap_clear(X86_FEATURE_IBT);
> > > + kvm_caps.supported_xss &= ~XFEATURE_CET_USER;
> > > + kvm_caps.supported_xss &= ~XFEATURE_CET_KERNEL;
> > > + }
> > > +
> > > #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
> > > cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
> > > #undef __kvm_cpu_cap_has
> > > @@ -12319,7 +12329,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> > >
> > > static inline bool is_xstate_reset_needed(void)
> > > {
> > > - return kvm_cpu_cap_has(X86_FEATURE_MPX);
> > > + return kvm_cpu_cap_has(X86_FEATURE_MPX) ||
> > > + kvm_cpu_cap_has(X86_FEATURE_SHSTK) ||
> > > + kvm_cpu_cap_has(X86_FEATURE_IBT);
> > > }
> > >
> > > void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > > @@ -12396,6 +12408,16 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > > XFEATURE_BNDCSR);
> > > }
> > >
> > > + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
> > > + fpstate_clear_xstate_component(fpstate,
> > > + XFEATURE_CET_USER);
> > > + fpstate_clear_xstate_component(fpstate,
> > > + XFEATURE_CET_KERNEL);
> > > + } else if (kvm_cpu_cap_has(X86_FEATURE_IBT)) {
> > > + fpstate_clear_xstate_component(fpstate,
> > > + XFEATURE_CET_USER);
> > > + }
> > > +
> > > if (init_event)
> > > kvm_load_guest_fpu(vcpu);
> > > }
> > > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> > > index d9cc352cf421..dc79dcd733ac 100644
> > > --- a/arch/x86/kvm/x86.h
> > > +++ b/arch/x86/kvm/x86.h
> > > @@ -531,6 +531,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
> > > __reserved_bits |= X86_CR4_VMXE; \
> > > if (!__cpu_has(__c, X86_FEATURE_PCID)) \
> > > __reserved_bits |= X86_CR4_PCIDE; \
> > > + if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
> > > + !__cpu_has(__c, X86_FEATURE_IBT)) \
> > > + __reserved_bits |= X86_CR4_CET; \
> > > __reserved_bits; \
> > > })
> > >
> >
> > Best regards,
> > Maxim Levitsky
> >
> >
> >