[PATCH v1 1/4] x86/mce: Collect error message for severities below MCE_PANIC_SEVERITY

From: Shuai Xue
Date: Tue Feb 11 2025 - 01:02:48 EST


Currently, mce_no_way_out() only collects error messages when the error
severity is equal to `MCE_PANIC_SEVERITY`. To improve diagnostics,
modify the behavior to also collect error messages when the severity is
less than `MCE_PANIC_SEVERITY`.

Signed-off-by: Shuai Xue <xueshuai@xxxxxxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/mce/core.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 0dc00c9894c7..2919a077cd66 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -925,11 +925,12 @@ static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_reg
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, unsigned long *validp,
- struct pt_regs *regs)
+static __always_inline bool mce_no_way_out(struct mce_hw_err *err, char **msg,
+ unsigned long *validp,
+ struct pt_regs *regs)
{
struct mce *m = &err->m;
- char *tmp = *msg;
+ char *tmp = *msg, cur_sev = MCE_NO_SEVERITY, sev;
int i;

for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
@@ -945,13 +946,17 @@ static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, un
quirk_zen_ifu(i, m, regs);

m->bank = i;
- if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ sev = mce_severity(m, regs, &tmp, true);
+ if (sev >= cur_sev) {
mce_read_aux(err, i);
*msg = tmp;
- return 1;
+ cur_sev = sev;
}
+
+ if (cur_sev == MCE_PANIC_SEVERITY)
+ return true;
}
- return 0;
+ return false;
}

/*
--
2.39.3