Re: [PATCH v2 12/13] KVM: x86: Add variable to control existence of emulator

From: Vitaly Kuznetsov
Date: Wed Feb 26 2020 - 13:01:34 EST


Sean Christopherson <sean.j.christopherson@xxxxxxxxx> writes:

> Add a global variable to control whether or not the emulator is enabled,
> and make all necessary changes to gracefully handle reaching emulation
> paths with the emulator disabled.
>
> Running with VMX's unrestricted guest disabled requires special
> consideration due to its use of kvm_inject_realmode_interrupt(). When
> unrestricted guest is disabled, KVM emulates interrupts and exceptions
> when the processor is in real mode, but does so without going through
> the standard emulator loop. Ideally, kvm_inject_realmode_interrupt()
> would only log the interrupt and defer actual emulation to the standard
> run loop, but that is a non-trivial change and a waste of resources
> given that unrestricted guest is supported on all CPUs shipped within
> the last decade. Similarly, dirtying up the event injection stack for
> such a legacy feature is undesirable. To avoid the conundrum, prevent
> disabling both the emulator and unrestricted guest.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/svm.c | 2 +-
> arch/x86/kvm/vmx/vmx.c | 7 ++++++-
> arch/x86/kvm/x86.c | 18 +++++++++++++++---
> 4 files changed, 23 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0dfe11f30d7f..c4baac32a291 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1050,7 +1050,7 @@ struct kvm_x86_ops {
> int (*hardware_enable)(void);
> void (*hardware_disable)(void);
> int (*check_processor_compatibility)(void);/* __init */
> - int (*hardware_setup)(void); /* __init */
> + int (*hardware_setup)(bool enable_emulator); /* __init */
> void (*hardware_unsetup)(void); /* __exit */
> bool (*cpu_has_accelerated_tpr)(void);
> bool (*has_emulated_msr)(int index);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index ae62ea454158..810139b3bfe4 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1350,7 +1350,7 @@ static __init void svm_adjust_mmio_mask(void)
> kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
> }
>
> -static __init int svm_hardware_setup(void)
> +static __init int svm_hardware_setup(bool enable_emulator)
> {
> int cpu;
> struct page *iopm_pages;
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 09bb0d98afeb..e05d36f63b73 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7548,7 +7548,7 @@ static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
> return to_vmx(vcpu)->nested.vmxon;
> }
>
> -static __init int hardware_setup(void)
> +static __init int hardware_setup(bool enable_emulator)
> {
> unsigned long host_bndcfgs;
> struct desc_ptr dt;
> @@ -7595,6 +7595,11 @@ static __init int hardware_setup(void)
> if (!cpu_has_virtual_nmis())
> enable_vnmi = 0;
>
> + if (!enable_emulator && !enable_unrestricted_guest) {
> + pr_warn("kvm: unrestricted guest disabled, emulator must be enabled\n");
> + return -EIO;
> + }
> +
> /*
> * set_apic_access_page_addr() is used to reload apic access
> * page upon invalidation. No need to do anything if not
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7bffdc6f9e1b..f9134e1104c2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -159,6 +159,8 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
> static bool __read_mostly force_emulation_prefix = false;
> module_param(force_emulation_prefix, bool, S_IRUGO);
>
> +static const bool enable_emulator = true;
> +
> int __read_mostly pi_inject_timer = -1;
> module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
>
> @@ -6474,6 +6476,9 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
> struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> int ret;
>
> + if (WARN_ON_ONCE(!ctxt))
> + return;
> +
> init_emulate_ctxt(ctxt);
>
> ctxt->op_bytes = 2;
> @@ -6791,6 +6796,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> bool writeback = true;
> bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
>
> + if (!ctxt)
> + return internal_emulation_error(vcpu);
> +
> vcpu->arch.l1tf_flush_l1d = true;
>
> /*
> @@ -8785,7 +8793,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>
> static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> - if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
> + if (vcpu->arch.emulate_regs_need_sync_to_vcpu &&
> + !(WARN_ON_ONCE(!vcpu->arch.emulate_ctxt))) {
> /*
> * We are here if userspace calls get_regs() in the middle of
> * instruction emulation. Registers state needs to be copied
> @@ -8982,6 +8991,9 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
> struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> int ret;
>
> + if (!ctxt)
> + return internal_emulation_error(vcpu);
> +
> init_emulate_ctxt(ctxt);
>
> ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
> @@ -9345,7 +9357,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> GFP_KERNEL_ACCOUNT))
> goto fail_free_mce_banks;
>
> - if (!alloc_emulate_ctxt(vcpu))
> + if (enable_emulator && !alloc_emulate_ctxt(vcpu))
> goto free_wbinvd_dirty_mask;
>
> vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
> @@ -9651,7 +9663,7 @@ int kvm_arch_hardware_setup(void)
> {
> int r;
>
> - r = kvm_x86_ops->hardware_setup();
> + r = kvm_x86_ops->hardware_setup(enable_emulator);
> if (r != 0)
> return r;

I'm not sure that emulator disablement should _only_ be a global
varaiable, i.e. why can't we do it for some VMs and allow for others?
(we can even run different userspaces for different needs). This,
however, is complementary.

Reviewed-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>

(I hope we have some userspaces which are already onboard)

--
Vitaly