[PATCH v2 09/16] x86/mce: Unify AMD THR handler with MCA Polling

From: Yazen Ghannam
Date: Thu Apr 04 2024 - 11:17:08 EST


AMD systems optionally support an MCA Thresholding interrupt. The
interrupt should be used as another signal to trigger MCA polling. This
is similar to how the Intel Corrected Machine Check interrupt (CMCI) is
handled.

AMD MCA Thresholding is managed using the MCA_MISC registers within an
MCA bank. The OS will need to modify the hardware error count field in
order to reset the threshold limit and rearm the interrupt. Management
of the MCA_MISC register should be done as a follow up to the basic MCA
polling flow. It should not be the main focus of the interrupt handler.

Furthermore, future systems will have the ability to send an MCA
Thresholding interrupt to the OS even when the OS does not manage the
feature, i.e. MCA_MISC registers are Read-as-Zero/Locked.

Call the common MCA polling function when handling the MCA Thresholding
interrupt. This will allow the OS to find any valid errors whether or
not the MCA Thresholding feature is OS-managed. Also, this allows the
common MCA polling options and kernel parameters to apply to AMD
systems.

Add a callback to the MCA polling function to handle vendor-specific
operations. Start by handling the AMD MCA Thresholding "block reset"
flow.

Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---

Notes:
Link:
https://lkml.kernel.org/r/20231118193248.1296798-14-yazen.ghannam@xxxxxxx

v1->v2:
* No change.

arch/x86/kernel/cpu/mce/amd.c | 57 ++++++++++++++----------------
arch/x86/kernel/cpu/mce/core.c | 8 +++++
arch/x86/kernel/cpu/mce/internal.h | 2 ++
3 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f59f4a1c9b21..75195d6fe971 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -979,12 +979,7 @@ static void amd_deferred_error_interrupt(void)
log_error_deferred(bank);
}

-static void log_error_thresholding(unsigned int bank, u64 misc)
-{
- _log_error_deferred(bank, misc);
-}
-
-static void log_and_reset_block(struct threshold_block *block)
+static void reset_block(struct threshold_block *block)
{
struct thresh_restart tr;
u32 low = 0, high = 0;
@@ -998,49 +993,51 @@ static void log_and_reset_block(struct threshold_block *block)
if (!(high & MASK_OVERFLOW_HI))
return;

- /* Log the MCE which caused the threshold event. */
- log_error_thresholding(block->bank, ((u64)high << 32) | low);
-
/* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr));
tr.b = block;
threshold_restart_bank(&tr);
}

-/*
- * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
- * goes off when error_count reaches threshold_limit.
- */
-static void amd_threshold_interrupt(void)
+static void reset_thr_blocks(unsigned int bank)
{
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
struct threshold_bank **bp = this_cpu_read(threshold_banks);
- unsigned int bank, cpu = smp_processor_id();

/*
* Validate that the threshold bank has been initialized already. The
* handler is installed at boot time, but on a hotplug event the
* interrupt might fire before the data has been initialized.
*/
- if (!bp)
+ if (!bp || !bp[bank])
return;

- for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
- if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
- continue;
+ first_block = bp[bank]->blocks;
+ if (!first_block)
+ return;

- first_block = bp[bank]->blocks;
- if (!first_block)
- continue;
+ /*
+ * The first block is also the head of the list. Check it first
+ * before iterating over the rest.
+ */
+ reset_block(first_block);
+ list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+ reset_block(block);
+}

- /*
- * The first block is also the head of the list. Check it first
- * before iterating over the rest.
- */
- log_and_reset_block(first_block);
- list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
- log_and_reset_block(block);
- }
+/*
+ * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
+ * goes off when error_count reaches threshold_limit.
+ */
+static void amd_threshold_interrupt(void)
+{
+ /* Check all banks for now. This could be optimized in the future. */
+ machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+}
+
+void amd_handle_error(struct mce *m)
+{
+ reset_thr_blocks(m->bank);
}

/*
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7a857b33f515..75297e7eb980 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -672,6 +672,12 @@ static noinstr void mce_read_aux(struct mce *m, int i)
}
}

+static void vendor_handle_error(struct mce *m)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return amd_handle_error(m);
+}
+
DEFINE_PER_CPU(unsigned, mce_poll_count);

/*
@@ -787,6 +793,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_log(&m);

clear_it:
+ vendor_handle_error(&m);
+
/*
* Clear state for this bank.
*/
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index e86e53695828..96b108175ca2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -267,6 +267,7 @@ void mce_setup_for_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
bool amd_mce_usable_address(struct mce *m);
+void amd_handle_error(struct mce *m);

/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -295,6 +296,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
#else
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
+static inline void amd_handle_error(struct mce *m) { }
static inline void smca_extract_err_addr(struct mce *m) { }
#endif

--
2.34.1