[PATCH v1 1/3] KVM: x86: Convert TDP level calculation to vendor's specific code

From: Wei Huang
Date: Thu Aug 05 2021 - 16:55:28 EST


Currently the TDP level for x86 vCPU is calculated by checking both
MAXPHYADDR and max_tdp_level. This design assumes that all x86 CPUs have
the flexibility of changing the nested page table level different from host
CPU. This assumption might not be true. To solve this problem, let us
create a kvm_x86_ops specific function for TDP level calculation.

Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 5 ++---
arch/x86/kvm/mmu/mmu.c | 22 +++++-----------------
arch/x86/kvm/svm/svm.c | 5 +++--
arch/x86/kvm/vmx/vmx.c | 7 ++++---
5 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index a12a4987154e..9853a7c9e4b7 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -85,6 +85,7 @@ KVM_X86_OP_NULL(sync_pir_to_irr)
KVM_X86_OP(set_tss_addr)
KVM_X86_OP(set_identity_map_addr)
KVM_X86_OP(get_mt_mask)
+KVM_X86_OP(get_tdp_level)
KVM_X86_OP(load_mmu_pgd)
KVM_X86_OP_NULL(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 974cbfb1eefe..20ddfbac966e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -723,7 +723,6 @@ struct kvm_vcpu_arch {

u64 reserved_gpa_bits;
int maxphyaddr;
- int max_tdp_level;

/* emulate context */

@@ -1365,6 +1364,7 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+ int (*get_tdp_level)(struct kvm_vcpu *vcpu);

void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level);
@@ -1747,8 +1747,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);

-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
- int tdp_huge_page_level);
+void kvm_configure_mmu(bool enable_tdp, int tdp_huge_page_level);

static inline u16 kvm_read_ldt(void)
{
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 66f7f5bc3482..44e4561e41f5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -97,7 +97,6 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
bool tdp_enabled = false;

static int max_huge_page_level __read_mostly;
-static int max_tdp_level __read_mostly;

enum {
AUDIT_PRE_PAGE_FAULT,
@@ -4560,15 +4559,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
return role;
}

-static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
-{
- /* Use 5-level TDP if and only if it's useful/necessary. */
- if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
- return 4;
-
- return max_tdp_level;
-}
-
static union kvm_mmu_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
struct kvm_mmu_role_regs *regs, bool base_only)
@@ -4576,7 +4566,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, regs, base_only);

role.base.ad_disabled = (shadow_accessed_mask == 0);
- role.base.level = kvm_mmu_get_tdp_level(vcpu);
+ role.base.level = static_call(kvm_x86_get_tdp_level)(vcpu);
role.base.direct = true;
role.base.gpte_is_8_bytes = true;

@@ -4597,7 +4587,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->page_fault = kvm_tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = NULL;
- context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu);
+ context->shadow_root_level = static_call(kvm_x86_get_tdp_level)(vcpu);
context->direct_map = true;
context->get_guest_pgd = get_cr3;
context->get_pdptr = kvm_pdptr_read;
@@ -4688,7 +4678,7 @@ kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu,
kvm_calc_shadow_root_page_role_common(vcpu, regs, false);

role.base.direct = false;
- role.base.level = kvm_mmu_get_tdp_level(vcpu);
+ role.base.level = static_call(kvm_x86_get_tdp_level)(vcpu);

return role;
}
@@ -5253,11 +5243,9 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
*/
}

-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
- int tdp_huge_page_level)
+void kvm_configure_mmu(bool enable_tdp, int tdp_huge_page_level)
{
tdp_enabled = enable_tdp;
- max_tdp_level = tdp_max_root_level;

/*
* max_huge_page_level reflects KVM's MMU capabilities irrespective
@@ -5356,7 +5344,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
* other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit
* KVM; that horror is handled on-demand by mmu_alloc_shadow_roots().
*/
- if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
+ if (tdp_enabled && static_call(kvm_x86_get_tdp_level)(vcpu) > PT32E_ROOT_LEVEL)
return 0;

page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e8ccab50ebf6..04710e10d04a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -258,7 +258,7 @@ u32 svm_msrpm_offset(u32 msr)

#define MAX_INST_SIZE 15

-static int get_max_npt_level(void)
+static int svm_get_npt_level(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return PT64_ROOT_4LEVEL;
@@ -1015,7 +1015,7 @@ static __init int svm_hardware_setup(void)
if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;

- kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
+ kvm_configure_mmu(npt_enabled, PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");

/* Note, SEV setup consumes npt_enabled. */
@@ -4619,6 +4619,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_tss_addr = svm_set_tss_addr,
.set_identity_map_addr = svm_set_identity_map_addr,
.get_mt_mask = svm_get_mt_mask,
+ .get_tdp_level = svm_get_npt_level,

.get_exit_info = svm_get_exit_info,

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 927a552393b9..419cea586646 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3062,9 +3062,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmx->emulation_required = emulation_required(vcpu);
}

-static int vmx_get_max_tdp_level(void)
+static int vmx_get_tdp_level(struct kvm_vcpu *vcpu)
{
- if (cpu_has_vmx_ept_5levels())
+ if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
return 5;
return 4;
}
@@ -7613,6 +7613,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_tss_addr = vmx_set_tss_addr,
.set_identity_map_addr = vmx_set_identity_map_addr,
.get_mt_mask = vmx_get_mt_mask,
+ .get_tdp_level = vmx_get_tdp_level,

.get_exit_info = vmx_get_exit_info,

@@ -7803,7 +7804,7 @@ static __init int hardware_setup(void)
ept_lpage_level = PG_LEVEL_2M;
else
ept_lpage_level = PG_LEVEL_4K;
- kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
+ kvm_configure_mmu(enable_ept, ept_lpage_level);

/*
* Only enable PML when hardware supports PML feature, and both EPT
--
2.31.1