Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

From: David Woodhouse
Date: Mon Jan 29 2018 - 14:01:29 EST



(Top-posting; sorry.)

Much of that is already fixed during our day, in
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb

I forgot to fix up the wrong-MSR typo though, and we do still need to address reset.

On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:
> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote:
> >
> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> > retpoline+IBPB based approach.
> >
> > To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL
> > for guests that do not actually use the MSR, only add_atomic_switch_msr when a
> > non-zero is written to it.
> >
> > Cc: Asit Mallick <asit.k.mallick@xxxxxxxxx>
> > Cc: Arjan Van De Ven <arjan.van.de.ven@xxxxxxxxx>
> > Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
> > Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> > Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
> > Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> > Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> > Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
> > Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> > Cc: David Woodhouse <dwmw@xxxxxxxxxxxx>
> > Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
> > Cc: Andy Lutomirski <luto@xxxxxxxxxx>
> > Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx>
> > Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> > ---
> > Âarch/x86/kvm/cpuid.c |ÂÂ4 +++-
> > Âarch/x86/kvm/cpuid.h |ÂÂ1 +
> > Âarch/x86/kvm/vmx.cÂÂÂ| 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > Â3 files changed, 67 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 0099e10..dc78095 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> > Â/* These are scattered features in cpufeatures.h. */
> > Â#define KVM_CPUID_BIT_AVX512_4VNNIWÂÂÂÂÂ2
> > Â#define KVM_CPUID_BIT_AVX512_4FMAPSÂÂÂÂÂ3
> > +#define KVM_CPUID_BIT_SPEC_CTRLÂÂÂÂÂÂÂÂÂ26
> > Â#define KF(x) bit(KVM_CPUID_BIT_##x)
> >
> > Âint kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> >
> > ÂÂÂÂÂÂÂÂ/* cpuid 7.0.edx*/
> > ÂÂÂÂÂÂÂÂconst u32 kvm_cpuid_7_0_edx_x86_features =
> > -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂKF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂKF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ(boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
> pass through for existing CPUs (26 and 27)?
>
> >
> >
> > ÂÂÂÂÂÂÂÂ/* all calls to cpuid_count() should be made on the same cpu */
> > ÂÂÂÂÂÂÂÂget_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> > index cdc70a3..dcfe227 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> > ÂÂÂÂÂÂÂÂ[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
> > ÂÂÂÂÂÂÂÂ[CPUID_7_ECX]ÂÂÂÂÂÂÂÂÂ= {ÂÂÂÂÂÂÂÂÂ7, 0, CPUID_ECX},
> > ÂÂÂÂÂÂÂÂ[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
> > +ÂÂÂÂÂÂÂ[CPUID_7_EDX]ÂÂÂÂÂÂÂÂÂ= {ÂÂÂÂÂÂÂÂÂ7, 0, CPUID_EDX},
> > Â};
> >
> > Âstatic __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index aa8638a..1b743a0 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
> > Âstatic bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂu16 error_code);
> > Âstatic void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂu32 msr, int type);
> > +
> >
> > Âstatic DEFINE_PER_CPU(struct vmcs *, vmxarea);
> > Âstatic DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> > ÂÂÂÂÂÂÂÂm->host[i].value = host_val;
> > Â}
> >
> > +/* do not touch guest_val and host_val if the msr is not found */
> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂu64 *guest_val, u64 *host_val)
> > +{
> > +ÂÂÂÂÂÂÂunsigned i;
> > +ÂÂÂÂÂÂÂstruct msr_autoload *m = &vmx->msr_autoload;
> > +
> > +ÂÂÂÂÂÂÂfor (i = 0; i < m->nr; ++i)
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂif (m->guest[i].index == msr)
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +
> > +ÂÂÂÂÂÂÂif (i == m->nr)
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn 1;
> > +
> > +ÂÂÂÂÂÂÂif (guest_val)
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ*guest_val = m->guest[i].value;
> > +ÂÂÂÂÂÂÂif (host_val)
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ*host_val = m->host[i].value;
> > +
> > +ÂÂÂÂÂÂÂreturn 0;
> > +}
> > +
> > Âstatic bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
> > Â{
> > ÂÂÂÂÂÂÂÂu64 guest_efer = vmx->vcpu.arch.efer;
> > @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
> > Â */
> > Âstatic int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > Â{
> > +ÂÂÂÂÂÂÂu64 spec_ctrl = 0;
> > ÂÂÂÂÂÂÂÂstruct shared_msr_entry *msr;
> > +ÂÂÂÂÂÂÂstruct vcpu_vmx *vmx = to_vmx(vcpu);
> >
> > ÂÂÂÂÂÂÂÂswitch (msr_info->index) {
> > Â#ifdef CONFIG_X86_64
> > @@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > ÂÂÂÂÂÂÂÂcase MSR_IA32_TSC:
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂmsr_info->data = guest_read_tsc(vcpu);
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂcase MSR_IA32_SPEC_CTRL:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂif (!msr_info->host_initiated &&
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
> Shouldn't this conjunct be:
> !(guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
> guest_cpuid_has(vcpu, X86_FEATURE_STIBP))?
>
> >
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn 1;
> What if !boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
> !boot_cpu_has(X86_FEATURE_STIBP)? That should also return 1, I think.
>
> >
> > +
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ/*
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ* If the MSR is not in the atomic list yet, then it was never
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ* written to. So the MSR value will be '0'.
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ*/
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂread_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL, &spec_ctrl, NULL);
> Why not just add msr_ia32_spec_ctrl to struct vcpu_vmx, so that you
> don't have to search the atomic switch list?
>
> >
> > +
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂmsr_info->data = spec_ctrl;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > ÂÂÂÂÂÂÂÂcase MSR_IA32_SYSENTER_CS:
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂmsr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > @@ -3289,6 +3329,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > ÂÂÂÂÂÂÂÂint ret = 0;
> > ÂÂÂÂÂÂÂÂu32 msr_index = msr_info->index;
> > ÂÂÂÂÂÂÂÂu64 data = msr_info->data;
> > +ÂÂÂÂÂÂÂunsigned long *msr_bitmap;
> > +
> > +ÂÂÂÂÂÂÂ/*
> > +ÂÂÂÂÂÂÂÂ* IBRS is not used (yet) to protect the host. Once it does, this
> > +ÂÂÂÂÂÂÂÂ* variable needs to be a bit smarter.
> > +ÂÂÂÂÂÂÂÂ*/
> > +ÂÂÂÂÂÂÂu64 host_spec_ctrl = 0;
> >
> > ÂÂÂÂÂÂÂÂswitch (msr_index) {
> > ÂÂÂÂÂÂÂÂcase MSR_EFER:
> > @@ -3330,6 +3377,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > ÂÂÂÂÂÂÂÂcase MSR_IA32_TSC:
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂkvm_write_tsc(vcpu, msr_info);
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂcase MSR_IA32_SPEC_CTRL:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂif (!msr_info->host_initiated &&
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn 1;
> This looks incomplete. As above, what if
> !boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
> !boot_cpu_has(X86_FEATURE_STIBP)?
> If the host doesn't support MSR_IA32_SPEC_CTRL, you'll get a VMX-abort
> on loading the host MSRs from the VM-exit MSR load list.
>
> Also, what if the value being written is illegal?
>
> /*
> * Processors that support IBRS but not STIBP
> * (CPUID.(EAX=07H, ECX=0):EDX[27:26] = 01b) will
> * ignore attempts to set STIBP instead of causing an
> * exception due to setting that reserved bit.
> */
> if ((data & ~(u64)(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) ||
> ÂÂÂÂ((data & SPEC_CTRL_IBRS) &&
> ÂÂÂÂÂ!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> return 1;
>
> >
> > +
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ/*
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ* Now we know that the guest is actually using the MSR, so
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ* atomically load and save the SPEC_CTRL MSR and pass it
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ* through to the guest.
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ*/
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂadd_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL, msr_info->data,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂhost_spec_ctrl);
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂmsr_bitmap = vmx->vmcs01.msr_bitmap;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂvmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
> I assume you mean MSR_IA32_SPEC_CTRL rather than MSR_FS_BASE.
>
> Also, what if the host and the guest support a different set of bits
> in MSR_IA32_SPEC_CTRL, due to a userspace modification of the guest's
> CPUID info?
>
> >
> > +
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > ÂÂÂÂÂÂÂÂcase MSR_IA32_CR_PAT:
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂif (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂif (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> > --
> > 2.7.4
> >
> Where do you preserve the guest's MSR_IA32_SPEC_CTRL value on VM-exit,
> if the guest has been given permission to write the MSR?
>
> You also have to clear the guest's MSR_IA32_SPEC_CTRL on
> vmx_vcpu_reset, don't you?
>

Attachment: smime.p7s
Description: S/MIME cryptographic signature