[PATCH RFC 7/7] x86/kvm/nVMX: optimize MMU switch from nested_vmx_load_cr3()

From: Vitaly Kuznetsov
Date: Fri Jul 20 2018 - 09:26:49 EST


Now we have everything in place to stop doing MMU reload when we switch
from L1 to L2 and back. Generalize shadow_ept_mmu_update_needed() making it
suitable for kvm_mmu_reset_context().

Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 10 +++++-
arch/x86/kvm/cpuid.c | 2 +-
arch/x86/kvm/mmu.c | 74 +++++++++++++++++++++++++++++++----------
arch/x86/kvm/svm.c | 6 ++--
arch/x86/kvm/vmx.c | 7 ++--
arch/x86/kvm/x86.c | 14 ++++----
6 files changed, 81 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fa73cf13c4d0..63ad28c40c1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -327,15 +327,22 @@ struct rsvd_bits_validate {

/* Source data used to setup MMU */
struct kvm_mmu_sdata_cache {
+ unsigned long cr3;
+
unsigned int valid:1;
+ unsigned int smm:1;
unsigned int ept_ad:1;
unsigned int execonly:1;
+ unsigned int cr0_pg:1;
unsigned int cr0_wp:1;
unsigned int cr4_pae:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
+ unsigned int cr4_la57:1;
+ unsigned int efer_lma:1;
+ unsigned int efer_nx:1;
};

/*
@@ -1149,7 +1156,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
u64 acc_track_mask, u64 me_mask);

-void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu, bool check_if_unchanged);
+
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3d47fd..b0efd08075d8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -142,7 +142,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)

/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);

kvm_pmu_refresh(vcpu);
return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eed1773453cd..9c08ee2e517a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4470,10 +4470,8 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}

-static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
- struct kvm_mmu *context = vcpu->arch.mmu;
-
context->base_role.word = 0;
context->base_role.guest_mode = is_guest_mode(vcpu);
context->base_role.smm = is_smm(vcpu);
@@ -4548,21 +4546,30 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);

-static inline bool shadow_ept_mmu_update_needed(struct kvm_vcpu *vcpu,
- bool execonly, bool accessed_dirty)
+static inline bool mmu_update_needed(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context,
+ bool execonly, bool accessed_dirty)
{
- struct kvm_mmu *context = vcpu->arch.mmu;
bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
bool cr4_pke = kvm_read_cr4_bits(vcpu, X86_CR4_PKE) != 0;
- bool cr0_wp = is_write_protection(vcpu);
+ bool cr4_la57 = kvm_read_cr4_bits(vcpu, X86_CR4_LA57) != 0;
bool cr4_pse = is_pse(vcpu);
+ bool cr0_wp = is_write_protection(vcpu);
+ bool cr0_pg = is_paging(vcpu);
+ bool efer_nx = is_nx(vcpu);
+ bool efer_lma = is_long_mode(vcpu);
+ bool smm = is_smm(vcpu);
bool res = false;

if (!context->scache.valid) {
res = true;
context->scache.valid = 1;
}
+ if (context->scache.smm != smm) {
+ context->scache.smm = smm;
+ res = true;
+ }
if (context->scache.ept_ad != accessed_dirty) {
context->scache.ept_ad = accessed_dirty;
res = true;
@@ -4587,10 +4594,26 @@ static inline bool shadow_ept_mmu_update_needed(struct kvm_vcpu *vcpu,
res = true;
context->scache.cr4_pke = cr4_pke;
}
+ if (context->scache.cr4_la57 != cr4_la57) {
+ res = true;
+ context->scache.cr4_la57 = cr4_la57;
+ }
if (context->scache.cr0_wp != cr0_wp) {
res = true;
context->scache.cr0_wp = cr0_wp;
}
+ if (context->scache.cr0_pg != cr0_pg) {
+ res = true;
+ context->scache.cr0_pg = cr0_pg;
+ }
+ if (context->scache.efer_nx != efer_nx) {
+ res = true;
+ context->scache.efer_nx = efer_nx;
+ }
+ if (context->scache.efer_lma != efer_lma) {
+ res = true;
+ context->scache.efer_lma = efer_lma;
+ }

return res;
}
@@ -4600,7 +4623,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
{
struct kvm_mmu *context = vcpu->arch.mmu;

- if (!shadow_ept_mmu_update_needed(vcpu, execonly, accessed_dirty))
+ if (!mmu_update_needed(vcpu, context, execonly, accessed_dirty))
return;

context->shadow_root_level = PT64_ROOT_4LEVEL;
@@ -4627,10 +4650,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);

-static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
- struct kvm_mmu *context = vcpu->arch.mmu;
-
kvm_init_shadow_mmu(vcpu);
context->set_cr3 = kvm_x86_ops->set_cr3;
context->get_cr3 = get_cr3;
@@ -4638,10 +4659,9 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
context->inject_page_fault = kvm_inject_page_fault;
}

-static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *g_context)
{
- struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
-
g_context->get_cr3 = get_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
@@ -4681,16 +4701,34 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
update_last_nonleaf_level(vcpu, g_context);
}

-void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu, bool check_if_unchanged)
{
+ struct kvm_mmu *context = mmu_is_nested(vcpu) ?
+ &vcpu->arch.nested_mmu : vcpu->arch.mmu;
+
+ if (check_if_unchanged && !mmu_update_needed(vcpu, context, 0, 0) &&
+ context->scache.cr3 == vcpu->arch.mmu->get_cr3(vcpu)) {
+ /*
+ * Nothing changed but TLB should always be flushed, e.g. when
+ * we switch between L1 and L2.
+ */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return;
+ } else if (!check_if_unchanged) {
+ context->scache.valid = 0;
+ }
+
kvm_mmu_unload(vcpu);

if (mmu_is_nested(vcpu))
- init_kvm_nested_mmu(vcpu);
+ init_kvm_nested_mmu(vcpu, context);
else if (tdp_enabled)
- init_kvm_tdp_mmu(vcpu);
+ init_kvm_tdp_mmu(vcpu, context);
else
- init_kvm_softmmu(vcpu);
+ init_kvm_softmmu(vcpu, context);
+
+ if (check_if_unchanged)
+ context->scache.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3b3b9839c2b5..6c1db96971c0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1574,7 +1574,7 @@ static void init_vmcb(struct vcpu_svm *svm)
* It also updates the guest-visible cr0 value.
*/
svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
- kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_reset_context(&svm->vcpu, false);

save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
@@ -3380,7 +3380,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_svm_unmap(page);

nested_svm_uninit_mmu_context(&svm->vcpu);
- kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_reset_context(&svm->vcpu, false);
kvm_mmu_load(&svm->vcpu);

return 0;
@@ -3466,7 +3466,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);

/* Guest paging mode is active - reset mmu */
- kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_reset_context(&svm->vcpu, false);

svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3467665a75d5..a85ed004a4ba 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4696,7 +4696,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);

- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);
}

static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -11123,6 +11123,8 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
u32 *entry_failure_code)
{
+ bool mmu_reset_force = false;
+
if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
if (!nested_cr3_valid(vcpu, cr3)) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -11135,6 +11137,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
*/
if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
!nested_ept) {
+ mmu_reset_force = true;
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
*entry_failure_code = ENTRY_FAIL_PDPTE;
return 1;
@@ -11145,7 +11148,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
}

- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, !mmu_reset_force);
return 0;
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5510a7f50195..3288a7e303ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -695,7 +695,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}

if ((cr0 ^ old_cr0) & update_bits)
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);

if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
@@ -836,7 +836,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)

if (((cr4 ^ old_cr4) & pdptr_bits) ||
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);

if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
kvm_update_cpuid(vcpu);
@@ -1162,7 +1162,7 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)

/* Update reserved bits */
if ((efer ^ old_efer) & EFER_NX)
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);

return 0;
}
@@ -5898,7 +5898,7 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}

- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);
}

static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
@@ -7156,7 +7156,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_x86_ops->set_efer(vcpu, 0);

kvm_update_cpuid(vcpu);
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);
}

static void process_smi(struct kvm_vcpu *vcpu)
@@ -8058,7 +8058,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
srcu_read_unlock(&vcpu->kvm->srcu, idx);

if (mmu_reset_needed)
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);

max_bits = KVM_NR_INTERRUPTS;
pending_vec = find_first_bit(
@@ -8333,7 +8333,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_vcpu_mtrr_init(vcpu);
vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
- kvm_mmu_reset_context(vcpu);
+ kvm_mmu_reset_context(vcpu, false);
vcpu_put(vcpu);
return 0;
}
--
2.14.4