[PATCH 7/9] x86/mce: Add mce=print_all option

From: Borislav Petkov
Date: Tue Apr 07 2020 - 12:34:31 EST


From: Tony Luck <tony.luck@xxxxxxxxx>

Sometimes, when logs are getting lost, it's nice to just
have everything dumped to the serial console.

Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Link: https://lkml.kernel.org/r/20200214222720.13168-7-tony.luck@xxxxxxxxx
---
arch/x86/kernel/cpu/mce/core.c | 7 ++++++-
arch/x86/kernel/cpu/mce/internal.h | 1 +
2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index fc879b6669d5..4efe6c128887 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -591,7 +591,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
if (!m)
return NOTIFY_DONE;

- if (!m->kflags)
+ if (mca_cfg.print_all || !m->kflags)
__print_mce(m);

return NOTIFY_DONE;
@@ -1962,6 +1962,7 @@ void mce_disable_bank(int bank)
* mce=no_cmci Disables CMCI
* mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs.
+ * mce=print_all Print all machine check logs to console
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
* monarchtimeout is how long to wait for other CPUs on machine
@@ -1990,6 +1991,8 @@ static int __init mcheck_enable(char *str)
cfg->lmce_disabled = 1;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
+ else if (!strcmp(str, "print_all"))
+ cfg->print_all = true;
else if (!strcmp(str, "ignore_ce"))
cfg->ignore_ce = true;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
@@ -2256,6 +2259,7 @@ static ssize_t store_int_with_restart(struct device *s,
static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
+static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);

static struct dev_ext_attribute dev_attr_check_interval = {
__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2280,6 +2284,7 @@ static struct device_attribute *mce_device_attrs[] = {
#endif
&dev_attr_monarch_timeout.attr,
&dev_attr_dont_log_ce.attr,
+ &dev_attr_print_all.attr,
&dev_attr_ignore_ce.attr,
&dev_attr_cmci_disabled.attr,
NULL
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 74a01829c4f4..55f5c7b755f2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -119,6 +119,7 @@ struct mca_config {
bool dont_log_ce;
bool cmci_disabled;
bool ignore_ce;
+ bool print_all;

__u64 lmce_disabled : 1,
disabled : 1,
--
2.21.0