[PATCH v3 2/4] x86/mce/inject: Avoid racy updates to MSR_K7_HWCR during MCE injection

From: Jim Mattson

Date: Thu Jun 18 2026 - 18:46:31 EST


MCE injection performs a read-modify-write of MSR_K7_HWCR as two
independent crosscalls (via toggle_hw_mce_inject) wrapping the actual MSR
writes (prepare_msrs). Another HWCR update on the target CPU could be lost
if it occurred between these crosscalls.

Introduce ipi_inject_mce() to perform the entire injection sequence (toggle
ON, write MSRs, toggle OFF) in a single IPI callback, ensuring atomicity on
the target CPU.

For the local CPU initialization path in check_hw_inj_possible(), use
amd_update_hwcr() directly to avoid IPI overhead and ensure safe updates.

Remove toggle_hw_mce_inject() as it is no longer used.

Opportunistically, replace the open-coded BIT(18) with a new
MSR_K7_HWCR_MCSTATUSWREN macro.

Fixes: 21690934d934 ("EDAC, mce_amd_inj: Enable direct writes to MCE MSRs")
Link: https://sashiko.dev/#/patchset/20260612215729.1532175-1-jmattson%40google.com?part=2
Assisted-by: Gemini:gemini-3.5-pro
Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx>
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/mce/inject.c | 46 ++++++++++----------------------
2 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 86554de9a3f5..29c4abade594 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -896,6 +896,8 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_MCSTATUSWREN_BIT 18
+#define MSR_K7_HWCR_MCSTATUSWREN BIT_ULL(MSR_K7_HWCR_MCSTATUSWREN_BIT)
#define MSR_K7_HWCR_IRPERF_EN_BIT 30
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 6f8a49d8baeb..1e017d8e23e4 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -31,6 +31,7 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/nmi.h>
+#include <asm/processor.h>
#include <asm/smp.h>

#include "internal.h"
@@ -311,30 +312,6 @@ static struct notifier_block inject_nb = {
.notifier_call = mce_inject_raise,
};

-/*
- * Caller needs to be make sure this cpu doesn't disappear
- * from under us, i.e.: get_cpu/put_cpu.
- */
-static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
-{
- struct msr val;
- int err;
-
- err = rdmsrq_on_cpu(cpu, MSR_K7_HWCR, &val.q);
- if (err) {
- pr_err("%s: error reading HWCR\n", __func__);
- return err;
- }
-
- enable ? (val.l |= BIT(18)) : (val.l &= ~BIT(18));
-
- err = wrmsrq_on_cpu(cpu, MSR_K7_HWCR, val.q);
- if (err)
- pr_err("%s: error writing HWCR\n", __func__);
-
- return err;
-}
-
static int __set_inj(const char *buf)
{
int i;
@@ -500,6 +477,12 @@ static void prepare_msrs(void *info)
wrmsrq(MSR_IA32_MCx_MISC(b), m.misc);
}
}
+static void ipi_inject_mce(void *info)
+{
+ amd_update_hwcr(MSR_K7_HWCR_MCSTATUSWREN_BIT, true);
+ prepare_msrs(info);
+ amd_update_hwcr(MSR_K7_HWCR_MCSTATUSWREN_BIT, false);
+}

static void do_inject(void)
{
@@ -556,13 +539,13 @@ static void do_inject(void)
if (!cpu_online(cpu))
goto err;

- toggle_hw_mce_inject(cpu, true);
-
i_mce.mcgstatus = mcg_status;
i_mce.inject_flags = inj_type;
- smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);

- toggle_hw_mce_inject(cpu, false);
+ if (smp_call_function_single(cpu, ipi_inject_mce, &i_mce, 1)) {
+ pr_err("%s: Error injecting MCE on CPU %d\n", __func__, cpu);
+ goto err;
+ }

switch (inj_type) {
case DFR_INT_INJ:
@@ -727,7 +710,6 @@ static void __init debugfs_init(void)

static void check_hw_inj_possible(void)
{
- int cpu;
u8 bank;

/*
@@ -737,7 +719,7 @@ static void check_hw_inj_possible(void)
if (!cpu_feature_enabled(X86_FEATURE_SMCA))
return;

- cpu = get_cpu();
+ get_cpu();

for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
u64 status = MCI_STATUS_VAL, ipid;
@@ -747,7 +729,7 @@ static void check_hw_inj_possible(void)
if (!ipid)
continue;

- toggle_hw_mce_inject(cpu, true);
+ amd_update_hwcr(MSR_K7_HWCR_MCSTATUSWREN_BIT, true);

wrmsrq_safe(mca_msr_reg(bank, MCA_STATUS), status);
rdmsrq_safe(mca_msr_reg(bank, MCA_STATUS), &status);
@@ -759,7 +741,7 @@ static void check_hw_inj_possible(void)
"Try using APEI EINJ instead.\n");
}

- toggle_hw_mce_inject(cpu, false);
+ amd_update_hwcr(MSR_K7_HWCR_MCSTATUSWREN_BIT, false);

break;
}
--
2.55.0.rc0.799.gd6f94ed593-goog