[PATCH 09/12] KVM: X86: Add debugfs to inject machine check on VM exit

From: isaku . yamahata
Date: Tue Oct 10 2023 - 04:36:39 EST


From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

The KVM/x86 handles machine-check in the guest specially. It sets up the
guest so that vcpu exits from running guests, checks the exit reason and,
manually raises the machine check by calling do_machine_check().

To test the KVM machine check execution path, KVM wants to inject the
machine check in the context of vcpu instead of the context of the process
of MCE injection. Wire up the MCE injection framework for KVM to trigger
MCE in the vcpu context. Add a kvm vcpu debugfs entry for an operator to
tell KVM to inject MCE.

The operation flow is as follows:
- Set notrigger to 1 to tell the x86 MCE injector to suppress it from
injecting machine check.
echo 1 > /sys/kernel/debug/mce-inject/notrigger
- Set MCE parameters via x86 MCE injector debugfs
/sys/kernel/debug/mce-inject/{addr, bank, flags, mcgstatus, misc, status}
- Tell KVM to inject MCE
echo 1 > /sys/kernel/debug/kvm/<pid>-<vm-fd>/vcpu<vcpuid>/mce-inject

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/debugfs.c | 22 ++++++++++++++++++++++
arch/x86/kvm/x86.c | 14 ++++++++++++++
3 files changed, 37 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb8731d..9286f3d02f30 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_MCE_INJECT KVM_ARCH_REQ(33)

#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index ee8c4c3496ed..fee208f30400 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -56,6 +56,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)

DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");

+static int vcpu_mce_inject_set(void *data, u64 val)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (val != 1)
+ return -EINVAL;
+ kvm_make_request(KVM_REQ_MCE_INJECT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_mce_inject_fops, NULL, vcpu_mce_inject_set, "%llx\n");
+
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
{
debugfs_create_file("guest_mode", 0444, debugfs_dentry, vcpu,
@@ -76,6 +92,12 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
debugfs_dentry, vcpu,
&vcpu_tsc_scaling_frac_fops);
}
+
+ if (IS_ENABLED(CONFIG_X86_MCE_INJECT) &&
+ boot_cpu_has(X86_FEATURE_MCE) && boot_cpu_has(X86_FEATURE_MCA))
+ debugfs_create_file("mce-inject", 0200,
+ debugfs_dentry, vcpu,
+ &vcpu_mce_inject_fops);
}

/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06bbda6..e4c63ded4c9a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10496,6 +10496,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
fastpath_t exit_fastpath;

bool req_immediate_exit = false;
+ bool req_mce_inject = false;

if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
@@ -10642,6 +10643,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)

if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
+
+ req_mce_inject = kvm_check_request(KVM_REQ_MCE_INJECT, vcpu);
}

if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -10676,6 +10679,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}

+ if (unlikely(req_mce_inject))
+ mce_inject_lock();
preempt_disable();

static_call(kvm_x86_prepare_switch_to_guest)(vcpu);
@@ -10721,6 +10726,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
smp_wmb();
local_irq_enable();
preempt_enable();
+ if (unlikely(req_mce_inject)) {
+ kvm_make_request(KVM_REQ_MCE_INJECT, vcpu);
+ mce_inject_unlock();
+ }
kvm_vcpu_srcu_read_lock(vcpu);
r = 1;
goto cancel_injection;
@@ -10814,6 +10823,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
fpu_sync_guest_vmexit_xfd_state();

static_call(kvm_x86_handle_exit_irqoff)(vcpu);
+ if (unlikely(req_mce_inject)) {
+ mce_call_atomic_injector_chain(smp_processor_id());
+ kvm_machine_check();
+ mce_inject_unlock();
+ }

if (vcpu->arch.guest_fpu.xfd_err)
wrmsrl(MSR_IA32_XFD_ERR, 0);
--
2.25.1