[PATCH v3 1/2] KVM: x86: Add a framework for supporting MSR-based features

From: Tom Lendacky
Date: Wed Feb 21 2018 - 14:40:04 EST


Provide a new KVM capability that allows bits within MSRs to be recognized
as features. Two new ioctls are added to the VM ioctl routine to retrieve
the list of these MSRs and then retrieve their values. An x86_kvm_ops
callback is used to determine support for the listed MSR-based features.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
---
Documentation/virtual/kvm/api.txt | 29 +++++++++++----
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/x86.c | 71 +++++++++++++++++++++++++++++++++++--
include/uapi/linux/kvm.h | 1 +
4 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 792fa87..613f346 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -127,10 +127,11 @@ flag KVM_VM_MIPS_VZ.

Capability: basic
Architectures: x86
-Type: system
+Type: system ioctl, vm ioctl
Parameters: struct kvm_msr_list (in/out)
Returns: 0 on success; -1 on error
Errors:
+ EFAULT: the msr index list cannot be read from or written to
E2BIG: the msr index list is to be to fit in the array specified by
the user.

@@ -139,16 +140,22 @@ struct kvm_msr_list {
__u32 indices[0];
};

+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+When used in a system ioctl:
This ioctl returns the guest msrs that are supported. The list varies
-by kvm version and host processor, but does not change otherwise. The
-user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in
-the indices array with their numbers.
+by kvm version and host processor, but does not change otherwise.

Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
not returned in the MSR list, as different vcpus can have a different number
of banks, as set via the KVM_X86_SETUP_MCE ioctl.

+When used in a vm ioctl:
+This ioctl returns the msrs that represent possible supported features.
+This list varies by kvm version and host processor.
+

4.4 KVM_CHECK_EXTENSION

@@ -477,12 +484,18 @@ Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.

Capability: basic
Architectures: x86
-Type: vcpu ioctl
+Type: vm ioctl, vcpu ioctl
Parameters: struct kvm_msrs (in/out)
-Returns: 0 on success, -1 on error
+Returns: number of msrs successfully returned;
+ -1 on error
+
+When used in a vm ioctl:
+Reads the values of msr-based features for the VM. The list of msr-based
+features can be obtained using KVM_GET_MSR_INDEX_LIST in a vm ioctl.

+When used in a vcpu ioctl:
Reads model-specific registers from the vcpu. Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST.
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.

struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a..e466bce 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1095,6 +1095,8 @@ struct kvm_x86_ops {
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+
+ int (*msr_feature)(struct kvm_msr_entry *entry);
};

struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b54..80ac039 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1049,6 +1049,28 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)

static unsigned num_emulated_msrs;

+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+};
+
+static unsigned int num_msr_based_features;
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+ struct kvm_msr_entry msr;
+
+ msr.index = index;
+ if (!kvm_x86_ops->msr_feature || kvm_x86_ops->msr_feature(&msr))
+ return 1;
+
+ *data = msr.data;
+
+ return 0;
+}
+
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits)
@@ -2680,13 +2702,17 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
- int i, idx;
+ int i, idx = 0;
+
+ if (vcpu)
+ idx = srcu_read_lock(&vcpu->kvm->srcu);

- idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (vcpu)
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);

return i;
}
@@ -2785,6 +2811,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_GET_MSR_FEATURES:
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
@@ -4410,6 +4437,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
break;
}
+ case KVM_GET_MSR_INDEX_LIST: {
+ struct kvm_msr_list __user *user_msr_list = argp;
+ struct kvm_msr_list msr_list;
+ unsigned int n;
+
+ r = -EFAULT;
+ if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+ goto out;
+ n = msr_list.nmsrs;
+ msr_list.nmsrs = num_msr_based_features;
+ if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+ goto out;
+ r = -E2BIG;
+ if (n < msr_list.nmsrs)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(user_msr_list->indices, &msr_based_features,
+ num_msr_based_features * sizeof(u32)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_GET_MSRS:
+ r = msr_io(NULL, argp, do_get_msr_feature, 1);
+ break;
default:
r = -ENOTTY;
}
@@ -4464,6 +4516,19 @@ static void kvm_init_msr_list(void)
j++;
}
num_emulated_msrs = j;
+
+ for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ struct kvm_msr_entry msr;
+
+ msr.index = msr_based_features[i];
+ if (!kvm_x86_ops->msr_feature || kvm_x86_ops->msr_feature(&msr))
+ continue;
+
+ if (j < i)
+ msr_based_features[j] = msr_based_features[i];
+ j++;
+ }
+ num_msr_based_features = j;
}

static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0fb5ef9..429784c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -934,6 +934,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_AIS_MIGRATION 150
#define KVM_CAP_PPC_GET_CPU_CHAR 151
#define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153

#ifdef KVM_CAP_IRQ_ROUTING