[PATCH v2 49/49] *** DO NOT APPLY *** KVM: x86: Verify KVM initializes all consumed guest caps

From: Sean Christopherson
Date: Fri May 17 2024 - 13:54:45 EST


Assert that all features queried via guest_cpu_cap_has() are known to KVM,
i.e. that KVM doesn't check for a feature that can never actually be set.

This is for demonstration purposes only, as the proper way to enforce this
is to do post-processing at build time (and there are other shortcomings
of this PoC, e.g. it requires all KVM modules to be built-in).

Not-signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/cpuid.c | 81 +++++++++++++++++++++++--------
arch/x86/kvm/cpuid.h | 16 +++++-
arch/x86/kvm/x86.c | 2 +
include/asm-generic/vmlinux.lds.h | 4 ++
4 files changed, 81 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0e64a6332052..18ded0e682f2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -37,6 +37,7 @@ u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
EXPORT_SYMBOL_GPL(kvm_cpu_caps);

static u32 kvm_vmm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
+static u32 kvm_known_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;

u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
@@ -143,6 +144,26 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
0; \
})

+/*
+ * Vendor Features - For features that KVM supports, but are added in later
+ * because they require additional vendor enabling.
+ */
+#define VEND_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ 0; \
+})
+
+/*
+ * Operating System Features - For features that KVM dynamically sets/clears at
+ * runtime, e.g. when CR4 changes, but are never advertised to userspace.
+ */
+#define OS_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ 0; \
+})
+
/*
* Magic value used by KVM when querying userspace-provided CPUID entries and
* doesn't care about the CPIUD index because the index of the function in
@@ -727,6 +748,7 @@ do { \
u32 __leaf = __feature_leaf(X86_FEATURE_##name); \
\
BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress); \
+ kvm_known_cpu_caps[__leaf] |= feature_bit(name); \
} while (0)

/*
@@ -771,14 +793,14 @@ void kvm_set_cpu_caps(void)
* NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not*
* advertised to guests via CPUID!
*/
- F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64 */ | VMM_F(MWAIT) |
- 0 /* DS-CPL, VMX, SMX, EST */ |
+ F(XMM3) | F(PCLMULQDQ) | VEND_F(DTES64) | VMM_F(MWAIT) |
+ VEND_F(VMX) | 0 /* DS-CPL, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | EMUL_F(X2APIC) | F(MOVBE) | F(POPCNT) |
EMUL_F(TSC_DEADLINE_TIMER) | F(AES) | F(XSAVE) |
- 0 /* OSXSAVE */ | F(AVX) | F(F16C) | F(RDRAND) |
+ OS_F(OSXSAVE) | F(AVX) | F(F16C) | F(RDRAND) |
EMUL_F(HYPERVISOR)
);

@@ -788,7 +810,7 @@ void kvm_set_cpu_caps(void)
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
- 0 /* Reserved, DS, ACPI */ | F(MMX) |
+ 0 /* Reserved */ | F(DS) | 0 /* ACPI */ | F(MMX) |
F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
0 /* HTT, TM, Reserved, PBE */
);
@@ -796,17 +818,17 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_init(CPUID_7_0_EBX,
F(FSGSBASE) | EMUL_F(TSC_ADJUST) | F(SGX) | F(BMI1) | F(HLE) |
F(AVX2) | F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) |
- F(INVPCID) | F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ |
+ F(INVPCID) | F(RTM) | F(ZERO_FCS_FDS) | VEND_F(MPX) |
F(AVX512F) | F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) |
- F(AVX512IFMA) | F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ |
+ F(AVX512IFMA) | F(CLFLUSHOPT) | F(CLWB) | VEND_F(INTEL_PT) |
F(AVX512PF) | F(AVX512ER) | F(AVX512CD) | F(SHA_NI) |
F(AVX512BW) | F(AVX512VL));

kvm_cpu_cap_init(CPUID_7_ECX,
- F(AVX512VBMI) | RAW_F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
+ F(AVX512VBMI) | RAW_F(LA57) | F(PKU) | OS_F(OSPKE) | F(RDPID) |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
- F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
+ F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | VEND_F(WAITPKG) |
F(SGX_LC) | F(BUS_LOCK_DETECT)
);

@@ -858,11 +880,11 @@ void kvm_set_cpu_caps(void)
);

kvm_cpu_cap_init(CPUID_8000_0001_ECX,
- F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
+ F(LAHF_LM) | F(CMP_LEGACY) | VEND_F(SVM) | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT) | 0 /* PERFCTR_CORE */
+ F(TOPOEXT) | VEND_F(PERFCTR_CORE)
);

kvm_cpu_cap_init(CPUID_8000_0001_EDX,
@@ -905,23 +927,22 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_AMD_SSBD);
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
kvm_cpu_cap_set(X86_FEATURE_AMD_SSB_NO);
- /*
- * The preference is to use SPEC CTRL MSR instead of the
- * VIRT_SPEC MSR.
- */
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
- !boot_cpu_has(X86_FEATURE_AMD_SSBD))
- kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);

/*
* Hide all SVM features by default, SVM will set the cap bits for
* features it emulates and/or exposes for L1.
*/
- kvm_cpu_cap_init(CPUID_8000_000A_EDX, 0);
+ kvm_cpu_cap_init(CPUID_8000_000A_EDX,
+ VEND_F(VMCBCLEAN) | VEND_F(FLUSHBYASID) | VEND_F(NRIPS) |
+ VEND_F(TSCRATEMSR) | VEND_F(V_VMSAVE_VMLOAD) | VEND_F(LBRV) |
+ VEND_F(PAUSEFILTER) | VEND_F(PFTHRESHOLD) | VEND_F(VGIF) |
+ VEND_F(VNMI) | VEND_F(SVME_ADDR_CHK)
+ );

kvm_cpu_cap_init(CPUID_8000_001F_EAX,
- 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ |
- F(SME_COHERENT));
+ VEND_F(SME) | VEND_F(SEV) | 0 /* VM_PAGE_FLUSH */ | VEND_F(SEV_ES) |
+ F(SME_COHERENT)
+ );

kvm_cpu_cap_init(CPUID_8000_0021_EAX,
F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
@@ -977,6 +998,26 @@ EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
#undef KVM_VALIDATE_CPU_CAP_USAGE
#define KVM_VALIDATE_CPU_CAP_USAGE(name)

+
+extern unsigned int __start___kvm_features[];
+extern unsigned int __stop___kvm_features[];
+
+void kvm_validate_cpu_caps(void)
+{
+ int i;
+
+ for (i = 0; i < __stop___kvm_features - __start___kvm_features; i++) {
+ u32 feature = __feature_translate(__start___kvm_features[i]);
+ u32 leaf = feature / 32;
+
+ if (kvm_known_cpu_caps[leaf] & BIT(feature & 31))
+ continue;
+
+ pr_warn("Word %u, bit %u (%lx) checked but not supported\n",
+ leaf, feature & 31, BIT(feature & 31));
+ }
+
+}
struct kvm_cpuid_array {
struct kvm_cpuid_entry2 *entries;
int maxnent;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 0bf3bddd0e29..32a86de980c7 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -10,6 +10,7 @@

extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
void kvm_set_cpu_caps(void);
+void kvm_validate_cpu_caps(void);

void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
@@ -245,8 +246,8 @@ static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
guest_cpu_cap_clear(vcpu, x86_feature);
}

-static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
+static __always_inline bool __guest_cpu_cap_has(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);

@@ -254,6 +255,17 @@ static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}

+#define guest_cpu_cap_has(vcpu, x86_feature) \
+({ \
+ asm volatile( \
+ " .pushsection \"__kvm_features\",\"a\"\n" \
+ " .balign 4\n" \
+ " .long " __stringify(x86_feature) " \n" \
+ " .popsection\n" \
+ ); \
+ __guest_cpu_cap_has(vcpu, x86_feature); \
+})
+
static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5aa7581802f7..f6b7c5c862fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9790,6 +9790,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (r != 0)
goto out_mmu_exit;

+ kvm_validate_cpu_caps();
+
kvm_ops_update(ops);

for_each_online_cpu(cpu) {
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f7749d0f2562..102fc2a39083 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -533,6 +533,10 @@
BOUNDED_SECTION_BY(__modver, ___modver) \
} \
\
+ __kvm_features : AT(ADDR(__kvm_features) - LOAD_OFFSET) { \
+ BOUNDED_SECTION_BY(__kvm_features, ___kvm_features) \
+ } \
+ \
KCFI_TRAPS \
\
RO_EXCEPTION_TABLE \
--
2.45.0.215.g3402c0e53f-goog