[PATCH v5 2/2] x86/mce: Add support for Extended Physical Address MCA changes

From: Smita Koralahalli
Date: Tue Apr 12 2022 - 11:41:35 EST


Newer AMD CPUs support more physical address bits.

That is the MCA_ADDR registers on Scalable MCA systems contain the
ErrorAddr in bits [56:0] instead of [55:0]. Hence the existing LSB field
from bits [61:56] in MCA_ADDR must be moved around to accommodate the
larger ErrorAddr size.

MCA_CONFIG[McaLsbInStatusSupported] indicates this change. If set, the
LSB field will be found in MCA_STATUS rather than MCA_ADDR.

Each logical CPU has unique MCA bank in hardware and is not shared with
other logical CPUs. Additionally on SMCA systems, each feature bit may be
different for each bank within same logical CPU.

Check for MCA_CONFIG[McaLsbInStatusSupported] for each MCA bank and for
each CPU.

Additionally, all MCA banks do not support maximum ErrorAddr bits in
MCA_ADDR. Some banks might support fewer bits but the remaining bits are
marked as reserved.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
Reviewed-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
Link: https://lkml.kernel.org/r/20220225193342.215780-4-Smita.KoralahalliChannabasappa@xxxxxxx
---
v2:
Declared lsb_in_status in existing mce_bank[] struct.
Moved struct mce_bank[] declaration from core.c -> internal.h
v3:
Rebased on the latest tip tree. No functional changes.
v4:
No change.
v5:
Extend comment for smca_extract_err_addr if AddrLsb is found in
MCA_STATUS registers.
---
arch/x86/kernel/cpu/mce/amd.c | 11 ++++++++++
arch/x86/kernel/cpu/mce/core.c | 11 ++++------
arch/x86/kernel/cpu/mce/internal.h | 32 +++++++++++++++++++++++++++---
3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f809eacac523..4f2744324d9b 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -722,6 +722,17 @@ bool amd_mce_is_memory_error(struct mce *m)
return m->bank == 4 && xec == 0x8;
}

+void smca_feature_init(void)
+{
+ unsigned int bank;
+ u64 mca_cfg;
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg);
+ this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8));
+ }
+}
+
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
struct mce m;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 39614c19da25..99e3ff9607a3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -67,13 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);

DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);

-struct mce_bank {
- u64 ctl; /* subevents to enable */
-
- __u64 init : 1, /* initialise bank? */
- __reserved_1 : 63;
-};
-static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);

#define ATTR_LEN 16
/* One object for each MCE bank, shared by all CPUs */
@@ -1935,6 +1929,9 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.amd_threshold = 1;
+
+ if (mce_flags.smca)
+ smca_feature_init();
}
}

diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 64dbae6b8a09..0f4934fb3d93 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -177,6 +177,22 @@ struct mce_vendor_flags {

extern struct mce_vendor_flags mce_flags;

+struct mce_bank {
+ u64 ctl; /* subevents to enable */
+
+ __u64 init : 1, /* initialise bank? */
+
+ /*
+ * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: This bit indicates
+ * the LSB field is found in MCA_STATUS, when set.
+ */
+ lsb_in_status : 1,
+
+ __reserved_1 : 62;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
enum mca_msr {
MCA_CTL,
MCA_STATUS,
@@ -190,7 +206,9 @@ extern bool filter_mce(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);

-/* Extract [55:<lsb>] where lsb is the LS-*valid* bit of the address bits. */
+/* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
+ * [56:0], else in bits [55:0] of MCA_ADDR.
+ */
static __always_inline void smca_extract_err_addr(struct mce *m)
{
u8 lsb;
@@ -198,14 +216,22 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
if (!mce_flags.smca)
return;

- lsb = (m->addr >> 56) & 0x3f;
+ if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) {
+ lsb = (m->status >> 24) & 0x3f;

- m->addr &= GENMASK_ULL(55, lsb);
+ m->addr &= GENMASK_ULL(56, lsb);
+ } else {
+ lsb = (m->addr >> 56) & 0x3f;
+
+ m->addr &= GENMASK_ULL(55, lsb);
+ }
}

+void smca_feature_init(void);
#else
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline void smca_extract_err_addr(struct mce *m) { }
+static inline void smca_feature_init(void) { }
#endif

#ifdef CONFIG_X86_ANCIENT_MCE
--
2.17.1