[RFC PATCH] kvm/x86: Keep root hpa in prev_roots as much as possible

From: Lai Jiangshan
Date: Wed May 26 2021 - 03:24:20 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

Pagetable roots in prev_roots[] are likely to be reused soon and
there is no much overhead to keep it with a new need_sync field
introduced.

With the help of the new need_sync field, pagetable roots are
kept as much as possible, and they will be re-synced before reused
instead of being dropped.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---

This patch is just for RFC.
Is the idea Ok?
If the idea is Ok, we need to reused one bit from pgd or hpa
as need_sync to save memory. Which one is better?

arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/mmu/mmu.c | 6 ++++++
arch/x86/kvm/vmx/nested.c | 12 ++++--------
arch/x86/kvm/x86.c | 9 +++++----
4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55efbacfc244..19a337cf7aa6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -354,10 +354,11 @@ struct rsvd_bits_validate {
struct kvm_mmu_root_info {
gpa_t pgd;
hpa_t hpa;
+ bool need_sync;
};

#define KVM_MMU_ROOT_INFO_INVALID \
- ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
+ ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE, .need_sync = true})

#define KVM_MMU_NUM_PREV_ROOTS 3

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5e60b00e8e50..147827135549 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3878,6 +3878,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd,

root.pgd = mmu->root_pgd;
root.hpa = mmu->root_hpa;
+ root.need_sync = false;

if (is_root_usable(&root, new_pgd, new_role))
return true;
@@ -3892,6 +3893,11 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd,
mmu->root_hpa = root.hpa;
mmu->root_pgd = root.pgd;

+ if (i < KVM_MMU_NUM_PREV_ROOTS && root.need_sync) {
+ kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+
return i < KVM_MMU_NUM_PREV_ROOTS;
}

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6058a65a6ede..ab7069ac6dc5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5312,7 +5312,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 vmx_instruction_info, types;
- unsigned long type, roots_to_free;
+ unsigned long type;
struct kvm_mmu *mmu;
gva_t gva;
struct x86_exception e;
@@ -5361,29 +5361,25 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);

- roots_to_free = 0;
if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
operand.eptp))
- roots_to_free |= KVM_MMU_ROOT_CURRENT;
+ kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOT_CURRENT);

for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
mmu->prev_roots[i].pgd,
operand.eptp))
- roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+ mmu->prev_roots[i].need_sync = true;
}
break;
case VMX_EPT_EXTENT_GLOBAL:
- roots_to_free = KVM_MMU_ROOTS_ALL;
+ kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOTS_ALL);
break;
default:
BUG();
break;
}

- if (roots_to_free)
- kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
-
return nested_vmx_succeed(vcpu);
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbc4e04e67ad..1f5617ec6b34 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11680,7 +11680,6 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
bool pcid_enabled;
struct x86_exception e;
unsigned i;
- unsigned long roots_to_free = 0;
struct {
u64 pcid;
u64 gla;
@@ -11722,9 +11721,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
== operand.pcid)
- roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+ vcpu->arch.mmu->prev_roots[i].need_sync = true;

- kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
/*
* If neither the current cr3 nor any of the prev_roots use the
* given PCID, then nothing needs to be done here because a
@@ -11743,7 +11741,10 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)

fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ kvm_mmu_sync_roots(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ vcpu->arch.mmu->prev_roots[i].need_sync = true;
return kvm_skip_emulated_instruction(vcpu);

default:
--
2.19.1.6.gb485710b