Skip the Indirect Branch Prediction Barrier that is triggered on a VMCS
switch when running with spectre_v2_user=on/auto if the switch is
between two VMCSes in the same guest, i.e. between vmcs01 and vmcs02.
The IBPB is intended to prevent one guest from attacking another, which
is unnecessary in the nested case as it's the same guest from KVM's
perspective.
This all but eliminates the overhead observed for nested VMX transitions
when running with CONFIG_RETPOLINE=y and spectre_v2_user=on/auto, which
can be significant, e.g. roughly 3x on current systems.
Reported-by: Alexander Graf <graf@xxxxxxxxxx>
Cc: KarimAllah Raslan <karahmed@xxxxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Fixes: 15d45071523d ("KVM/x86: Add IBPB support")
Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
---
v2: Pass a boolean to indicate a nested VMCS switch and instead WARN if
the buddy VMCS is not already loaded. [Alex]
Paolo, feel free to drop the WARN_ON_ONCE() if you think it's overkill.
I'm 50/50 as to whether it's useful or just a waste of cycles. Figured
it'd be easier for you to delete a line of code while applying than to add
and test a new WARN.
arch/x86/kvm/vmx/nested.c | 3 ++-
arch/x86/kvm/vmx/vmx.c | 7 ++++---
arch/x86/kvm/vmx/vmx.h | 2 +-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2c36f3f53108..b57420f3dd8f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -302,8 +302,9 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
cpu = get_cpu();
prev = vmx->loaded_vmcs;
+ WARN_ON_ONCE(prev->cpu != cpu || prev->vmcs != per_cpu(current_vmcs, cpu));
vmx->loaded_vmcs = vmcs;
- vmx_vcpu_load_vmcs(vcpu, cpu);
+ vmx_vcpu_load_vmcs(vcpu, cpu, true);
vmx_sync_vmcs_host_state(vmx, prev);
put_cpu();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3ab6ca6062ce..d3d57b7a67bd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1311,7 +1311,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
pi_set_on(pi_desc);
}
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, bool nested_switch)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
@@ -1336,7 +1336,8 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
- indirect_branch_prediction_barrier();
+ if (!nested_switch)
+ indirect_branch_prediction_barrier();
}
if (!already_loaded) {
@@ -1377,7 +1378,7 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx_vcpu_load_vmcs(vcpu, cpu);
+ vmx_vcpu_load_vmcs(vcpu, cpu, false);
vmx_vcpu_pi_load(vcpu, cpu);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index b5e773267abe..fa61dc802183 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -320,7 +320,7 @@ struct kvm_vmx {
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, bool nested_switch);
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
int allocate_vpid(void);
void free_vpid(int vpid);
--
2.26.0