[PATCH] KVM: vmx: shadow more fields that are read/written on every vmexits

From: Paolo Bonzini
Date: Wed Dec 13 2017 - 07:14:09 EST


Compared to when VMCS shadowing was added to KVM, we are reading/writing
a few more fields: the PML index, the interrupt status and the preemption
timer value. The first two are because we are exposing more features
to nested guests, the preemption timer is simply because we have grown
a new optimization. Adding them to the shadow VMCS field lists reduces
the cost of a vmexit by about 1000 clock cycles for each field that exists
on bare metal.

On the other hand, the guest BNDCFGS and TSC offset are not written on
fast paths, so remove them.

Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
Cc: Jim Mattson <jmattson@xxxxxxxxxx>
Cc: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/vmx.c | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d2a35a326e47..9fadba5338fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -754,11 +754,12 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
- GUEST_BNDCFGS,
+ GUEST_PML_INDEX,
+ GUEST_INTR_STATUS,
+ VMX_PREEMPTION_TIMER_VALUE,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
- TSC_OFFSET,
EXCEPTION_BITMAP,
CPU_BASED_VM_EXEC_CONTROL,
VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -4070,9 +4071,22 @@ static void init_vmcs_shadow_fields(void)
/* No checks for read only fields yet */

for (i = j = 0; i < max_shadow_read_write_fields; i++) {
+ /*
+ * PML and the preemption timer can be emulated, but the
+ * processor cannot vmwrite to fields that don't exist
+ * on bare metal.
+ */
switch (shadow_read_write_fields[i]) {
- case GUEST_BNDCFGS:
- if (!kvm_mpx_supported())
+ case GUEST_PML_INDEX:
+ if (!cpu_has_vmx_pml())
+ continue;
+ break;
+ case VMX_PREEMPTION_TIMER_VALUE:
+ if (!cpu_has_vmx_preemption_timer())
+ continue;
+ break;
+ case GUEST_INTR_STATUS:
+ if (!cpu_has_vmx_apicv())
continue;
break;
default:
@@ -7068,11 +7082,6 @@ static __init int hardware_setup(void)
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;

- if (!cpu_has_vmx_shadow_vmcs())
- enable_shadow_vmcs = 0;
- if (enable_shadow_vmcs)
- init_vmcs_shadow_fields();
-
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels() ||
!cpu_has_vmx_ept_mt_wb() ||
@@ -7192,6 +7201,11 @@ static __init int hardware_setup(void)
kvm_x86_ops->cancel_hv_timer = NULL;
}

+ if (!cpu_has_vmx_shadow_vmcs())
+ enable_shadow_vmcs = 0;
+ if (enable_shadow_vmcs)
+ init_vmcs_shadow_fields();
+
kvm_set_posted_intr_wakeup_handler(wakeup_handler);

kvm_mce_cap_supported |= MCG_LMCE_P;
--
1.8.3.1