Re: [PATCH v2 1/3] KVM: X86: Provides userspace with a capability to not intercept MWAIT

From: Wanpeng Li
Date: Tue Mar 13 2018 - 19:42:07 EST


Hi Jim,
2018-03-14 2:21 GMT+08:00 Jim Mattson <jmattson@xxxxxxxxxx>:
> Is there a need for a new API for yielding MONITOR/MWAIT to the guest?
> Why not just tie this to the guest CPUID.01H:ECX[MWAIT] being set?

The API also will be used by HLT/PAUSE. Please refer to Paolo's
original proposal though I didn't find a link which is replied by
Paolo direclty. https://marc.info/?l=kvm&m=151182818103804&w=2

Regards,
Wanpeng Li

>
> On Mon, Mar 12, 2018 at 4:53 AM, Wanpeng Li <kernellwp@xxxxxxxxx> wrote:
>> From: Wanpeng Li <wanpengli@xxxxxxxxxxx>
>>
>> Allowing a guest to execute MWAIT without interception enables a guest
>> to put a (physical) CPU into a power saving state, where it takes
>> longer to return from than what may be desired by the host.
>>
>> Don't give a guest that power over a host by default. (Especially,
>> since nothing prevents a guest from using MWAIT even when it is not
>> advertised via CPUID.)
>>
>> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
>> Cc: Jan H. SchÃnherr <jschoenh@xxxxxxxxx>
>> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx>
>> ---
>> Documentation/virtual/kvm/api.txt | 23 ++++++++++++++---------
>> arch/x86/include/asm/kvm_host.h | 2 ++
>> arch/x86/kvm/svm.c | 2 +-
>> arch/x86/kvm/vmx.c | 9 +++++----
>> arch/x86/kvm/x86.c | 24 ++++++++++++++++++++----
>> arch/x86/kvm/x86.h | 10 +++++-----
>> include/uapi/linux/kvm.h | 2 +-
>> tools/include/uapi/linux/kvm.h | 2 +-
>> 8 files changed, 49 insertions(+), 25 deletions(-)
>>
>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>> index 98de506..76e5a15 100644
>> --- a/Documentation/virtual/kvm/api.txt
>> +++ b/Documentation/virtual/kvm/api.txt
>> @@ -4358,6 +4358,20 @@ enables QEMU to build error log and branch to guest kernel registered
>> machine check handling routine. Without this capability KVM will
>> branch to guests' 0x200 interrupt vector.
>>
>> +7.13 KVM_CAP_X86_DISABLE_EXITS
>> +
>> +Architectures: x86
>> +Parameters: args[0] defines which exits are disabled
>> +Returns: 0 on success, -EINVAL when args[0] contains invalid exits
>> +
>> +Valid exits in args[0] are
>> +
>> +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
>> +
>> +Enabling this capability on a VM provides userspace with a way to no
>> +longer intercepts some instructions for improved latency in some
>> +workloads.
>> +
>> 8. Other capabilities.
>> ----------------------
>>
>> @@ -4470,15 +4484,6 @@ reserved.
>> Both registers and addresses are 64-bits wide.
>> It will be possible to run 64-bit or 32-bit guest code.
>>
>> -8.8 KVM_CAP_X86_GUEST_MWAIT
>> -
>> -Architectures: x86
>> -
>> -This capability indicates that guest using memory monotoring instructions
>> -(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit. As such time
>> -spent while virtual CPU is halted in this way will then be accounted for as
>> -guest running time on the host (as opposed to e.g. HLT).
>> -
>> 8.9 KVM_CAP_ARM_USER_IRQ
>>
>> Architectures: arm, arm64
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 0395c35..e107171 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -811,6 +811,8 @@ struct kvm_arch {
>>
>> gpa_t wall_clock;
>>
>> + bool mwait_in_guest;
>> +
>> bool ept_identity_pagetable_done;
>> gpa_t ept_identity_map_addr;
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index be9c839..321b3fd 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1390,7 +1390,7 @@ static void init_vmcb(struct vcpu_svm *svm)
>> set_intercept(svm, INTERCEPT_XSETBV);
>> set_intercept(svm, INTERCEPT_RSM);
>>
>> - if (!kvm_mwait_in_guest()) {
>> + if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
>> set_intercept(svm, INTERCEPT_MONITOR);
>> set_intercept(svm, INTERCEPT_MWAIT);
>> }
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 6cefd7b..2302ae2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3733,13 +3733,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>> CPU_BASED_UNCOND_IO_EXITING |
>> CPU_BASED_MOV_DR_EXITING |
>> CPU_BASED_USE_TSC_OFFSETING |
>> + CPU_BASED_MWAIT_EXITING |
>> + CPU_BASED_MONITOR_EXITING |
>> CPU_BASED_INVLPG_EXITING |
>> CPU_BASED_RDPMC_EXITING;
>>
>> - if (!kvm_mwait_in_guest())
>> - min |= CPU_BASED_MWAIT_EXITING |
>> - CPU_BASED_MONITOR_EXITING;
>> -
>> opt = CPU_BASED_TPR_SHADOW |
>> CPU_BASED_USE_MSR_BITMAPS |
>> CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
>> @@ -5531,6 +5529,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
>> exec_control |= CPU_BASED_CR3_STORE_EXITING |
>> CPU_BASED_CR3_LOAD_EXITING |
>> CPU_BASED_INVLPG_EXITING;
>> + if (kvm_mwait_in_guest(vmx->vcpu.kvm))
>> + exec_control &= ~(CPU_BASED_MWAIT_EXITING |
>> + CPU_BASED_MONITOR_EXITING);
>> return exec_control;
>> }
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 36ef3d8..5fae476 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2809,9 +2809,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
>> return r;
>> }
>>
>> +static inline bool kvm_can_mwait_in_guest(void)
>> +{
>> + return boot_cpu_has(X86_FEATURE_MWAIT) &&
>> + !boot_cpu_has_bug(X86_BUG_MONITOR);
>> +}
>> +
>> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> {
>> - int r;
>> + int r = 0;
>>
>> switch (ext) {
>> case KVM_CAP_IRQCHIP:
>> @@ -2867,8 +2873,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> case KVM_CAP_ADJUST_CLOCK:
>> r = KVM_CLOCK_TSC_STABLE;
>> break;
>> - case KVM_CAP_X86_GUEST_MWAIT:
>> - r = kvm_mwait_in_guest();
>> + case KVM_CAP_X86_DISABLE_EXITS:
>> + if(kvm_can_mwait_in_guest())
>> + r |= KVM_X86_DISABLE_EXITS_MWAIT;
>> break;
>> case KVM_CAP_X86_SMM:
>> /* SMBASE is usually relocated above 1M on modern chipsets,
>> @@ -2909,7 +2916,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> r = KVM_X2APIC_API_VALID_FLAGS;
>> break;
>> default:
>> - r = 0;
>> break;
>> }
>> return r;
>> @@ -4214,6 +4220,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>>
>> r = 0;
>> break;
>> + case KVM_CAP_X86_DISABLE_EXITS:
>> + r = -EINVAL;
>> + if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
>> + break;
>> +
>> + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
>> + kvm_can_mwait_in_guest())
>> + kvm->arch.mwait_in_guest = true;
>> + r = 0;
>> + break;
>> default:
>> r = -EINVAL;
>> break;
>> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
>> index b91215d..cd1215e 100644
>> --- a/arch/x86/kvm/x86.h
>> +++ b/arch/x86/kvm/x86.h
>> @@ -2,8 +2,6 @@
>> #ifndef ARCH_X86_KVM_X86_H
>> #define ARCH_X86_KVM_X86_H
>>
>> -#include <asm/processor.h>
>> -#include <asm/mwait.h>
>> #include <linux/kvm_host.h>
>> #include <asm/pvclock.h>
>> #include "kvm_cache_regs.h"
>> @@ -264,10 +262,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
>> __rem; \
>> })
>>
>> -static inline bool kvm_mwait_in_guest(void)
>> +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
>> +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT)
>> +
>> +static inline bool kvm_mwait_in_guest(struct kvm *kvm)
>> {
>> - return boot_cpu_has(X86_FEATURE_MWAIT) &&
>> - !boot_cpu_has_bug(X86_BUG_MONITOR);
>> + return kvm->arch.mwait_in_guest;
>> }
>>
>> #endif
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 088c2c9..1065006 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt {
>> #define KVM_CAP_S390_GS 140
>> #define KVM_CAP_S390_AIS 141
>> #define KVM_CAP_SPAPR_TCE_VFIO 142
>> -#define KVM_CAP_X86_GUEST_MWAIT 143
>> +#define KVM_CAP_X86_DISABLE_EXITS 143
>> #define KVM_CAP_ARM_USER_IRQ 144
>> #define KVM_CAP_S390_CMMA_MIGRATION 145
>> #define KVM_CAP_PPC_FWNMI 146
>> diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
>> index 0fb5ef9..b13c257 100644
>> --- a/tools/include/uapi/linux/kvm.h
>> +++ b/tools/include/uapi/linux/kvm.h
>> @@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt {
>> #define KVM_CAP_S390_GS 140
>> #define KVM_CAP_S390_AIS 141
>> #define KVM_CAP_SPAPR_TCE_VFIO 142
>> -#define KVM_CAP_X86_GUEST_MWAIT 143
>> +#define KVM_CAP_X86_DISABLE_EXITS 143
>> #define KVM_CAP_ARM_USER_IRQ 144
>> #define KVM_CAP_S390_CMMA_MIGRATION 145
>> #define KVM_CAP_PPC_FWNMI 146
>> --
>> 2.7.4
>>