[PATCH v3 3/6] KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability

From: Vitaly Kuznetsov
Date: Wed Jul 25 2018 - 12:31:15 EST


Enlightened VMCS is opt-in. The current version does not contain all
fields supported by nested VMX so we must not advertise the
corresponding VMX features if enlightened VMCS is enabled.

Userspace is given the enlightened VMCS version supported by KVM as
part of enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS. The version is to
be advertised to the nested hypervisor, currently done via a cpuid
leaf for Hyper-V.

Suggested-by: Ladi Prosek <lprosek@xxxxxxxxxx>
Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
Reviewed-by: Liran Alon <liran.alon@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/svm.c | 9 +++++++++
arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 15 +++++++++++++++
include/uapi/linux/kvm.h | 1 +
5 files changed, 65 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 150937e64f63..065d1841b7e8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,6 +1124,9 @@ struct kvm_x86_ops {
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);

int (*get_msr_feature)(struct kvm_msr_entry *entry);
+
+ int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version);
};

struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 73e27a98456f..cadcfed2d9d9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7038,6 +7038,13 @@ static int svm_unregister_enc_region(struct kvm *kvm,
return ret;
}

+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version)
+{
+ /* Intel-only feature */
+ return -ENODEV;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7165,6 +7172,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.mem_enc_op = svm_mem_enc_op,
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
+
+ .nested_enable_evmcs = nested_enable_evmcs,
};

static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b47f92c79147..7871aa00cd6d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -668,6 +668,13 @@ struct nested_vmx {

bool change_vmcs01_virtual_apic_mode;

+ /*
+ * Enlightened VMCS has been enabled. It does not mean that L1 has to
+ * use it. However, VMX features available to L1 will be limited based
+ * on what the enlightened VMCS supports.
+ */
+ bool enlightened_vmcs_enabled;
+
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;

@@ -1365,6 +1372,34 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */

+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /* We don't support disabling the feature for simplicity. */
+ if (vmx->nested.enlightened_vmcs_enabled)
+ return 0;
+
+ vmx->nested.enlightened_vmcs_enabled = true;
+
+ /*
+ * vmcs_version represents the range of supported Enlightened VMCS
+ * versions: lower 8 bits is the minimal version, higher 8 bits is the
+ * maximum supported version. KVM supports versions from 1 to
+ * KVM_EVMCS_VERSION.
+ */
+ *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
+
+ vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+ vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+
+ return 0;
+}
+
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -13734,6 +13769,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.pre_enter_smm = vmx_pre_enter_smm,
.pre_leave_smm = vmx_pre_leave_smm,
.enable_smi_window = enable_smi_window,
+
+ .nested_enable_evmcs = nested_enable_evmcs,
};

static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 20b9745aebda..99583d84b90d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2882,6 +2882,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_HYPERV_EVENTFD:
case KVM_CAP_HYPERV_TLBFLUSH:
+ case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3662,6 +3663,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
+ int r;
+ uint16_t vmcs_version;
+ void __user *user_ptr;
+
if (cap->flags)
return -EINVAL;

@@ -3674,6 +3679,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
+ case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+ r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
+ if (!r) {
+ user_ptr = (void __user *)(uintptr_t)cap->args[0];
+ if (copy_to_user(user_ptr, &vmcs_version,
+ sizeof(vmcs_version)))
+ r = -EFAULT;
+ }
+ return r;
+
default:
return -EINVAL;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c2e91c28e09d..5c737ec724e3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -950,6 +950,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_EVENTFD 154
#define KVM_CAP_HYPERV_TLBFLUSH 155
#define KVM_CAP_NESTED_STATE 156
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 157

#ifdef KVM_CAP_IRQ_ROUTING

--
2.14.4