[PATCH v13 01/21] KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support

From: Chao Gao
Date: Thu Aug 21 2025 - 09:43:05 EST


From: Yang Weijiang <weijiang.yang@xxxxxxxxx>

Enable KVM_{G,S}ET_ONE_REG uAPIs so that userspace can access HW MSR or
KVM synthetic MSR through it.

In CET KVM series [1], KVM "steals" an MSR from PV MSR space and access
it via KVM_{G,S}ET_MSRs uAPIs, but the approach pollutes PV MSR space
and hides the difference of synthetic MSRs and normal HW defined MSRs.

Now carve out a separate room in KVM-customized MSR address space for
synthetic MSRs. The synthetic MSRs are not exposed to userspace via
KVM_GET_MSR_INDEX_LIST, instead userspace complies with KVM's setup and
composes the uAPI params. KVM synthetic MSR indices start from 0 and
increase linearly. Userspace caller should tag MSR type correctly in
order to access intended HW or synthetic MSR.

Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
Link: https://lore.kernel.org/all/20240219074733.122080-18-weijiang.yang@xxxxxxxxx/ [1]
Tested-by: Mathias Krause <minipli@xxxxxxxxxxxxxx>
Tested-by: John Allen <john.allen@xxxxxxx>
Tested-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx>
Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>

---
v13:
- Add vendor and size fields to the register ID to align with other
architectures. (Sean)
- Avoid exposing the struct overlay of the register ID to in uAPI
headers (Sean)
- Advertise KVM_CAP_ONE_REG
---
arch/x86/include/uapi/asm/kvm.h | 21 +++++++++
arch/x86/kvm/x86.c | 82 +++++++++++++++++++++++++++++++++
2 files changed, 103 insertions(+)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0f15d683817d..969a63e73190 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -411,6 +411,27 @@ struct kvm_xcrs {
__u64 padding[16];
};

+#define KVM_X86_REG_TYPE_MSR 2
+#define KVM_X86_REG_TYPE_SYNTHETIC_MSR 3
+
+#define KVM_X86_REG_TYPE_SIZE(type) \
+({ \
+ __u64 type_size = (__u64)type << 32; \
+ \
+ type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \
+ type == KVM_X86_REG_TYPE_SYNTHETIC_MSR ? KVM_REG_SIZE_U64 :\
+ 0; \
+ type_size; \
+})
+
+#define KVM_X86_REG_ENCODE(type, index) \
+ (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type) | index)
+
+#define KVM_X86_REG_MSR(index) \
+ KVM_X86_REG_ENCODE(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_SYNTHETIC_MSR(index) \
+ KVM_X86_REG_ENCODE(KVM_X86_REG_TYPE_SYNTHETIC_MSR, index)
+
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ba2cdfdac44..31a7e7ad310a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2254,6 +2254,31 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}

+static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *value)
+{
+ u64 val;
+ int r;
+
+ r = do_get_msr(vcpu, msr, &val);
+ if (r)
+ return r;
+
+ if (put_user(val, value))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *value)
+{
+ u64 val;
+
+ if (get_user(val, value))
+ return -EFAULT;
+
+ return do_set_msr(vcpu, msr, &val);
+}
+
#ifdef CONFIG_X86_64
struct pvclock_clock {
int vclock_mode;
@@ -4737,6 +4762,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_MEMORY_FAULT_INFO:
case KVM_CAP_X86_GUEST_MODE:
+ case KVM_CAP_ONE_REG:
r = 1;
break;
case KVM_CAP_PRE_FAULT_MEMORY:
@@ -5915,6 +5941,20 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
}
}

+struct kvm_x86_reg_id {
+ __u32 index;
+ __u8 type;
+ __u8 rsvd;
+ __u8 rsvd4:4;
+ __u8 size:4;
+ __u8 x86;
+};
+
+static int kvm_translate_synthetic_msr(struct kvm_x86_reg_id *reg)
+{
+ return -EINVAL;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -6031,6 +6071,48 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
+ case KVM_GET_ONE_REG:
+ case KVM_SET_ONE_REG: {
+ struct kvm_x86_reg_id *id;
+ struct kvm_one_reg reg;
+ u64 __user *value;
+
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+
+ r = -EINVAL;
+ if ((reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86)
+ break;
+
+ id = (struct kvm_x86_reg_id *)&reg.id;
+ if (id->rsvd || id->rsvd4)
+ break;
+
+ if (id->type != KVM_X86_REG_TYPE_MSR &&
+ id->type != KVM_X86_REG_TYPE_SYNTHETIC_MSR)
+ break;
+
+ if ((reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64)
+ break;
+
+ if (id->type == KVM_X86_REG_TYPE_SYNTHETIC_MSR) {
+ r = kvm_translate_synthetic_msr(id);
+ if (r)
+ break;
+ }
+
+ r = -EINVAL;
+ if (id->type != KVM_X86_REG_TYPE_MSR)
+ break;
+
+ value = u64_to_user_ptr(reg.addr);
+ if (ioctl == KVM_GET_ONE_REG)
+ r = kvm_get_one_msr(vcpu, id->index, value);
+ else
+ r = kvm_set_one_msr(vcpu, id->index, value);
+ break;
+ }
case KVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;

--
2.47.3