[PATCH 16/19] KVM: x86: Enable CET virtualization for VMX and advertise CET to userspace
From: Yang Weijiang
Date: Thu Jun 16 2022 - 04:50:32 EST
Set the feature bits so that CET capabilities can be seen in guest via
CPUID enumeration. Add CR4.CET bit support in order to allow guest set CET
master control bit(CR4.CET).
Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
KVM does not support emulating CET.
Don't expose CET feature if dependent CET bits are cleared in host XSS,
or if XSAVES isn't supported. Updating the CET features in common x86 is
a little ugly, but there is no clean solution without risking breakage of
SVM if SVM hardware ever gains support for CET, e.g. moving everything to
common x86 would prematurely expose CET on SVM. The alternative is to
put all the logic in VMX, but that means rereading host_xss in VMX and
duplicating the XSAVES check across VMX and SVM.
Co-developed-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/cpuid.c | 5 +++--
arch/x86/kvm/vmx/capabilities.h | 4 ++++
arch/x86/kvm/vmx/vmx.c | 37 ++++++++++++++++++++++++++++-----
arch/x86/kvm/x86.c | 21 ++++++++++++++++++-
5 files changed, 61 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7e98b2876380..a7e5463d0107 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -120,7 +120,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 46ca0f1abbcb..12d527e612e5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -581,7 +581,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC) | F(BUS_LOCK_DETECT)
+ F(SGX_LC) | F(BUS_LOCK_DETECT) | F(SHSTK)
);
/* Set LA57 based on hardware capability. */
if (cpuid_ecx(7) & F(LA57))
@@ -599,7 +599,8 @@ void kvm_set_cpu_caps(void)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
- F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
+ F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) |
+ F(IBT)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 069d8d298e1d..6849888f7b46 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -106,6 +106,10 @@ static inline bool cpu_has_load_perf_global_ctrl(void)
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
}
+static inline bool cpu_has_load_cet_ctrl(void)
+{
+ return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE);
+}
static inline bool cpu_has_vmx_mpx(void)
{
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fc1229f23987..4bdede87669a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2516,6 +2516,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
+ { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -2636,7 +2637,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
VM_EXIT_LOAD_IA32_EFER |
VM_EXIT_CLEAR_BNDCFGS |
VM_EXIT_PT_CONCEAL_PIP |
- VM_EXIT_CLEAR_IA32_RTIT_CTL;
+ VM_EXIT_CLEAR_IA32_RTIT_CTL |
+ VM_EXIT_LOAD_CET_STATE;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
@@ -2660,7 +2662,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
VM_ENTRY_LOAD_IA32_EFER |
VM_ENTRY_LOAD_BNDCFGS |
VM_ENTRY_PT_CONCEAL_PIP |
- VM_ENTRY_LOAD_IA32_RTIT_CTL;
+ VM_ENTRY_LOAD_IA32_RTIT_CTL |
+ VM_ENTRY_LOAD_CET_STATE;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -2705,7 +2708,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
}
}
-
rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -6159,6 +6161,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
+ if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
+ pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
+ pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
+ pr_err("SSP TABLE = 0x%016lx\n",
+ vmcs_readl(GUEST_INTR_SSP_TABLE));
+ }
pr_err("*** Host State ***\n");
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6236,6 +6244,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
pr_err("Virtual processor ID = 0x%04x\n",
vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
+ pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
+ pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
+ pr_err("SSP TABLE = 0x%016lx\n",
+ vmcs_readl(HOST_INTR_SSP_TABLE));
+ }
}
/*
@@ -7679,9 +7693,10 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_UMIP);
/* CPUID 0xD.1 */
- kvm_caps.supported_xss = 0;
- if (!cpu_has_vmx_xsaves())
+ if (!cpu_has_vmx_xsaves()) {
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
+ kvm_caps.supported_xss = 0;
+ }
/* CPUID 0x80000001 and 0x7 (RDPID) */
if (!cpu_has_vmx_rdtscp()) {
@@ -7691,6 +7706,18 @@ static __init void vmx_set_cpu_caps(void)
if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+ if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_USER;
+ }
+
+#ifndef CONFIG_X86_SHADOW_STACK
+ if (boot_cpu_has(X86_FEATURE_SHSTK))
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+#endif
+
}
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 86bccb12f036..fe049d0e5ecc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -217,6 +217,8 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
+#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_USER)
+
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
@@ -11807,8 +11809,10 @@ int kvm_arch_hardware_setup(void *opaque)
rdmsrl_safe(MSR_EFER, &host_efer);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
rdmsrl(MSR_IA32_XSS, host_xss);
+ kvm_caps.supported_xss = host_xss & KVM_SUPPORTED_XSS;
+ }
kvm_init_pmu_capability();
@@ -11823,6 +11827,21 @@ int kvm_arch_hardware_setup(void *opaque)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
+ /* Update CET features now as kvm_caps.supported_xss is finalized. */
+ if (!kvm_cet_user_supported()) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ }
+
+ /*
+ * If SHSTK and IBT are disabled either by user space or unselection in
+ * Kconfig,then the feature bits should have been removed from KVM caps
+ * by this point, clear CET user bit in kvm_caps.supported_xss too.
+ */
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_CET_USER;
+
#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
#undef __kvm_cpu_cap_has
--
2.27.0