[RFC PATCH 3/5] x86/mce: Introduce a function pointer mce_handle_storm

From: Smita Koralahalli
Date: Wed Apr 06 2022 - 06:06:27 EST


Introduce a function pointer "mce_handle_storm". This function pointer
does the vendor specific storm handling. In Intel it points to a routine
to set different thresholds in IA32_MCi_CTL2.

No functional changes.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
---
The intention of keeping this patch separate was just to not make any
changes in Tony's initial code as it can get confusing. These changes
could be merged in Tony's new CMCI storm mitigation patch.
---
arch/x86/kernel/cpu/mce/core.c | 5 +++++
arch/x86/kernel/cpu/mce/intel.c | 12 ++++++++++--
arch/x86/kernel/cpu/mce/internal.h | 3 +++
3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 13844a38aa2c..db6d60825e77 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1599,6 +1599,10 @@ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static DEFINE_PER_CPU(bool, storm_poll_mode);

+void mce_handle_storm_default(int bank, bool on) { }
+
+void (*mce_handle_storm)(int bank, bool on) = mce_handle_storm_default;
+
static void __start_timer(struct timer_list *t, unsigned long interval)
{
unsigned long when = jiffies + interval;
@@ -1988,6 +1992,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
+ mce_handle_storm = mce_intel_handle_storm;
break;

case X86_VENDOR_AMD: {
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 59cad4061e5a..7edc31742fe0 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -159,6 +159,14 @@ static void cmci_set_threshold(int bank, int thresh)
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}

+void mce_intel_handle_storm(int bank, bool on)
+{
+ if (on)
+ cmci_set_threshold(bank, cmci_threshold[bank]);
+ else
+ cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
+}
+
static void cmci_storm_begin(int bank)
{
__set_bit(bank, this_cpu_ptr(mce_poll_banks));
@@ -218,13 +226,13 @@ void track_cmci_storm(int bank, u64 status)
if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD - 1, 0))
return;
pr_notice("CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), bank);
- cmci_set_threshold(bank, cmci_threshold[bank]);
+ mce_handle_storm(bank, true);
cmci_storm_end(bank);
} else {
if (hweight64(history) < STORM_BEGIN_THRESHOLD)
return;
pr_notice("CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), bank);
- cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
+ mce_handle_storm(bank, false);
cmci_storm_begin(bank);
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 1ee8fc0d97fe..c95802db9535 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -42,6 +42,7 @@ extern mce_banks_t mce_banks_ce_disabled;

#ifdef CONFIG_X86_MCE_INTEL
void track_cmci_storm(int bank, u64 status);
+void mce_intel_handle_storm(int bank, bool on);
void cmci_disable_bank(int bank);
void intel_init_cmci(void);
void intel_init_lmce(void);
@@ -49,6 +50,7 @@ void intel_clear_lmce(void);
bool intel_filter_mce(struct mce *m);
#else
static inline void track_cmci_storm(int bank, u64 status) { }
+# define mce_intel_handle_storm mce_handle_storm_default
static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
@@ -57,6 +59,7 @@ static inline bool intel_filter_mce(struct mce *m) { return false; }
#endif

void mce_timer_kick(bool storm);
+extern void (*mce_handle_storm)(int bank, bool on);

#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
--
2.17.1