[RFC PATCH 17/73] KVM: x86/PVM: Implement module initialization related callbacks

From: Lai Jiangshan
Date: Mon Feb 26 2024 - 09:42:12 EST


From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>

Implement hardware enable/disable and setup/unsetup callbacks for PVM
module initialization.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
---
arch/x86/kvm/pvm/pvm.c | 226 +++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/pvm/pvm.h | 20 ++++
2 files changed, 246 insertions(+)

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 1dfa1ae57c8c..83aa2c9f42f6 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -9,18 +9,244 @@
* the COPYING file in the top-level directory.
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>

+#include <asm/pvm_para.h>
+
+#include "cpuid.h"
+#include "x86.h"
+#include "pvm.h"
+
MODULE_AUTHOR("AntGroup");
MODULE_LICENSE("GPL");

+static bool __read_mostly is_intel;
+
+static unsigned long host_idt_base;
+
+static void pvm_setup_mce(struct kvm_vcpu *vcpu)
+{
+}
+
+static bool pvm_has_emulated_msr(struct kvm *kvm, u32 index)
+{
+ switch (index) {
+ case MSR_IA32_MCG_EXT_CTL:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
+ return false;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ case MSR_AMD64_TSC_RATIO:
+ /* This is AMD SVM only. */
+ return false;
+ case MSR_IA32_SMBASE:
+ /* Currenlty we only run guest in long mode. */
+ return false;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static bool cpu_has_pvm_wbinvd_exit(void)
+{
+ return true;
+}
+
+static int hardware_enable(void)
+{
+ /* Nothing to do */
+ return 0;
+}
+
+static void hardware_disable(void)
+{
+ /* Nothing to do */
+}
+
+static int pvm_check_processor_compat(void)
+{
+ /* Nothing to do */
+ return 0;
+}
+
+static __init void pvm_set_cpu_caps(void)
+{
+ if (boot_cpu_has(X86_FEATURE_NX))
+ kvm_enable_efer_bits(EFER_NX);
+ if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ kvm_set_cpu_caps();
+
+ /* Unloading kvm-intel.ko doesn't clean up kvm_caps.supported_mce_cap. */
+ kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
+
+ kvm_caps.supported_xss = 0;
+
+ /* PVM supervisor mode runs on hardware ring3, so no xsaves. */
+ kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
+
+ /*
+ * PVM supervisor mode runs on hardware ring3, so SMEP and SMAP can not
+ * be supported directly through hardware. But they can be emulated
+ * through other hardware feature when needed.
+ */
+
+ /*
+ * PVM doesn't support SMAP, but the similar protection might be
+ * emulated via PKU in the future.
+ */
+ kvm_cpu_cap_clear(X86_FEATURE_SMAP);
+
+ /*
+ * PVM doesn't support SMEP. When NX is supported and the guest can
+ * use NX on the user pagetable to emulate the same protection as SMEP.
+ */
+ kvm_cpu_cap_clear(X86_FEATURE_SMEP);
+
+ /*
+ * Unlike VMX/SVM which can switches paging mode atomically, PVM
+ * implements guest LA57 through host LA57 shadow paging.
+ */
+ if (!pgtable_l5_enabled())
+ kvm_cpu_cap_clear(X86_FEATURE_LA57);
+
+ /*
+ * Even host pcid is not enabled, guest pcid can be enabled to reduce
+ * the heavy guest tlb flushing. Guest CR4.PCIDE is not directly
+ * mapped to the hardware and is virtualized by PVM so that it can be
+ * enabled unconditionally.
+ */
+ kvm_cpu_cap_set(X86_FEATURE_PCID);
+
+ /* Don't expose MSR_IA32_SPEC_CTRL to guest */
+ kvm_cpu_cap_clear(X86_FEATURE_SPEC_CTRL);
+ kvm_cpu_cap_clear(X86_FEATURE_AMD_STIBP);
+ kvm_cpu_cap_clear(X86_FEATURE_AMD_IBRS);
+ kvm_cpu_cap_clear(X86_FEATURE_AMD_SSBD);
+
+ /* PVM hypervisor hasn't implemented LAM so far */
+ kvm_cpu_cap_clear(X86_FEATURE_LAM);
+
+ /* Don't expose MSR_IA32_DEBUGCTLMSR related features. */
+ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
+}
+
+static __init int hardware_setup(void)
+{
+ struct desc_ptr dt;
+
+ store_idt(&dt);
+ host_idt_base = dt.address;
+
+ pvm_set_cpu_caps();
+
+ kvm_configure_mmu(false, 0, 0, 0);
+
+ enable_apicv = 0;
+
+ return 0;
+}
+
+static void hardware_unsetup(void)
+{
+}
+
+struct kvm_x86_nested_ops pvm_nested_ops = {};
+
+static struct kvm_x86_ops pvm_x86_ops __initdata = {
+ .name = KBUILD_MODNAME,
+
+ .check_processor_compatibility = pvm_check_processor_compat,
+
+ .hardware_unsetup = hardware_unsetup,
+ .hardware_enable = hardware_enable,
+ .hardware_disable = hardware_disable,
+ .has_emulated_msr = pvm_has_emulated_msr,
+
+ .has_wbinvd_exit = cpu_has_pvm_wbinvd_exit,
+
+ .nested_ops = &pvm_nested_ops,
+
+ .setup_mce = pvm_setup_mce,
+};
+
+static struct kvm_x86_init_ops pvm_init_ops __initdata = {
+ .hardware_setup = hardware_setup,
+
+ .runtime_ops = &pvm_x86_ops,
+};
+
static void pvm_exit(void)
{
+ kvm_exit();
+ kvm_x86_vendor_exit();
+ host_mmu_destroy();
+ allow_smaller_maxphyaddr = false;
+ kvm_cpuid_vendor_signature = 0;
}
module_exit(pvm_exit);

+static int __init hardware_cap_check(void)
+{
+ /*
+ * switcher can't be used when KPTI. See the comments above
+ * SWITCHER_SAVE_AND_SWITCH_TO_HOST_CR3
+ */
+ if (boot_cpu_has(X86_FEATURE_PTI)) {
+ pr_warn("Support for host KPTI is not included yet.\n");
+ return -EOPNOTSUPP;
+ }
+ if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) {
+ pr_warn("FSGSBASE is required per PVM specification.\n");
+ return -EOPNOTSUPP;
+ }
+ if (!boot_cpu_has(X86_FEATURE_RDTSCP)) {
+ pr_warn("RDTSCP is required to support for getcpu in guest vdso.\n");
+ return -EOPNOTSUPP;
+ }
+ if (!boot_cpu_has(X86_FEATURE_CX16)) {
+ pr_warn("CMPXCHG16B is required for guest.\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int __init pvm_init(void)
{
+ int r;
+
+ r = hardware_cap_check();
+ if (r)
+ return r;
+
+ r = host_mmu_init();
+ if (r)
+ return r;
+
+ is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
+
+ r = kvm_x86_vendor_init(&pvm_init_ops);
+ if (r)
+ goto exit_host_mmu;
+
+ r = kvm_init(sizeof(struct vcpu_pvm), __alignof__(struct vcpu_pvm), THIS_MODULE);
+ if (r)
+ goto exit_vendor;
+
+ allow_smaller_maxphyaddr = true;
+ kvm_cpuid_vendor_signature = PVM_CPUID_SIGNATURE;
+
return 0;
+
+exit_vendor:
+ kvm_x86_vendor_exit();
+exit_host_mmu:
+ host_mmu_destroy();
+ return r;
}
module_init(pvm_init);
diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h
index 7a3732986a6d..6149cf5975a4 100644
--- a/arch/x86/kvm/pvm/pvm.h
+++ b/arch/x86/kvm/pvm/pvm.h
@@ -2,6 +2,8 @@
#ifndef __KVM_X86_PVM_H
#define __KVM_X86_PVM_H

+#include <linux/kvm_host.h>
+
#define PT_L4_SHIFT 39
#define PT_L4_SIZE (1UL << PT_L4_SHIFT)
#define DEFAULT_RANGE_L4_SIZE (32 * PT_L4_SIZE)
@@ -20,4 +22,22 @@ extern u64 *host_mmu_root_pgd;
void host_mmu_destroy(void);
int host_mmu_init(void);

+struct vcpu_pvm {
+ struct kvm_vcpu vcpu;
+};
+
+struct kvm_pvm {
+ struct kvm kvm;
+};
+
+static __always_inline struct kvm_pvm *to_kvm_pvm(struct kvm *kvm)
+{
+ return container_of(kvm, struct kvm_pvm, kvm);
+}
+
+static __always_inline struct vcpu_pvm *to_pvm(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct vcpu_pvm, vcpu);
+}
+
#endif /* __KVM_X86_PVM_H */
--
2.19.1.6.gb485710b