Re: [RFC 02/10] x86/kvm: Add IBPB support

From: Jim Mattson
Date: Mon Jan 22 2018 - 14:31:24 EST


Oh, but to do that properly, you need one of the per-vCPU bitmap
implementations that Paolo and I have independently posted.

On Mon, Jan 22, 2018 at 10:56 AM, Jim Mattson <jmattson@xxxxxxxxxx> wrote:
> On Sat, Jan 20, 2018 at 11:22 AM, KarimAllah Ahmed <karahmed@xxxxxxxxx> wrote:
>> From: Ashok Raj <ashok.raj@xxxxxxxxx>
>>
>> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
>> barriers on switching between VMs to avoid inter VM specte-v2 attacks.
>>
>> [peterz: rebase and changelog rewrite]
>> [dwmw2: fixes]
>> [karahmed: - vmx: expose PRED_CMD whenever it is available
>> - svm: only pass through IBPB if it is available]
>>
>> Cc: Asit Mallick <asit.k.mallick@xxxxxxxxx>
>> Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
>> Cc: Arjan Van De Ven <arjan.van.de.ven@xxxxxxxxx>
>> Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
>> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
>> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
>> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>> Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
>> Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
>> Cc: Andy Lutomirski <luto@xxxxxxxxxx>
>> Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
>> Cc: David Woodhouse <dwmw@xxxxxxxxxxxx>
>> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
>> Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@xxxxxxxxx
>>
>> Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
>> Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx>
>> ---
>> arch/x86/kvm/svm.c | 14 ++++++++++++++
>> arch/x86/kvm/vmx.c | 4 ++++
>> 2 files changed, 18 insertions(+)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 2744b973..cfdb9ab 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -529,6 +529,7 @@ struct svm_cpu_data {
>> struct kvm_ldttss_desc *tss_desc;
>>
>> struct page *save_area;
>> + struct vmcb *current_vmcb;
>> };
>>
>> static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
>> @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
>>
>> set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
>> }
>> +
>> + if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
>> + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
>> }
>>
>> static void add_msr_offset(u32 offset)
>> @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>> __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
>> kvm_vcpu_uninit(vcpu);
>> kmem_cache_free(kvm_vcpu_cache, svm);
>> + /*
>> + * The vmcb page can be recycled, causing a false negative in
>> + * svm_vcpu_load(). So do a full IBPB now.
>> + */
>> + indirect_branch_prediction_barrier();
>> }
>>
>> static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>> {
>> struct vcpu_svm *svm = to_svm(vcpu);
>> + struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
>> int i;
>>
>> if (unlikely(cpu != vcpu->cpu)) {
>> @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>> if (static_cpu_has(X86_FEATURE_RDTSCP))
>> wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
>>
>> + if (sd->current_vmcb != svm->vmcb) {
>> + sd->current_vmcb = svm->vmcb;
>> + indirect_branch_prediction_barrier();
>> + }
>> avic_vcpu_load(vcpu, cpu);
>> }
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index d1e25db..3b64de2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>> if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>> per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>> vmcs_load(vmx->loaded_vmcs->vmcs);
>> + indirect_branch_prediction_barrier();
>> }
>>
>> if (!already_loaded) {
>> @@ -6791,6 +6792,9 @@ static __init int hardware_setup(void)
>> kvm_tsc_scaling_ratio_frac_bits = 48;
>> }
>>
>> + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
>
> I think the condition here should be:
>
> if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
>
> __do_cpuid_ent should pass through X86_FEATURE_SPEC_CTRL from the
> host, but userspace should be allowed to clear it.
> (Userspace should not be allowed to set it if the host doesn't support it.)
>
>> + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
>> +
>> vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
>> vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
>> vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
>> --
>> 2.7.4
>>