[PATCH v2 04/16] x86/mce/amd: Look up bank type by IPID
From: Yazen Ghannam
Date: Thu Apr 04 2024 - 11:15:13 EST
Scalable MCA systems use values within the MCA_IPID register to describe
a bank's type. Other information is not needed.
Currently, the bank types are cached during boot and this information is
used during boot and run time. The cached values are per-CPU and
per-bank. The boot path needs the cached values, but this should be
removed. The run time path does not need the cached values.
Determine a Scalable MCA bank's type using only the MCA_IPID values.
Keep old code until init path is cleaned up.
Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
Notes:
Link:
https://lkml.kernel.org/r/20231118193248.1296798-9-yazen.ghannam@xxxxxxx
v1->v2:
* Include bitops started in dropped patches. (Yazen)
* Update all users of smca_get_bank_type(). (Yazen)
arch/x86/include/asm/mce.h | 4 +-
arch/x86/kernel/cpu/mce/amd.c | 99 ++++++++++++++++++++++---
drivers/edac/amd64_edac.c | 2 +-
drivers/edac/mce_amd.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
5 files changed, 94 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index de3118305838..adad99bac567 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -59,8 +59,6 @@
* - TCC bit is present in MCx_STATUS.
*/
#define MCI_CONFIG_MCAX 0x1
-#define MCI_IPID_MCATYPE 0xFFFF0000
-#define MCI_IPID_HWID 0xFFF
/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -342,7 +340,7 @@ extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
void mce_amd_feature_init(struct cpuinfo_x86 *c);
-enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
+enum smca_bank_types smca_get_bank_type(u64 ipid);
#else
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index bc78e751dfcc..c76bc158b6b6 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -7,6 +7,7 @@
*
* All MC4_MISCi registers are shared between cores on a node.
*/
+#include <linux/bitfield.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/kobject.h>
@@ -51,6 +52,10 @@
#define DEF_INT_TYPE_APIC 0x2
/* Scalable MCA: */
+#define MCI_IPID_MCATYPE GENMASK_ULL(47, 44)
+#define MCI_IPID_HWID GENMASK_ULL(43, 32)
+#define MCI_IPID_MCATYPE_OLD 0xFFFF0000
+#define MCI_IPID_HWID_OLD 0xFFF
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
@@ -131,7 +136,7 @@ static const char *smca_get_name(enum smca_bank_types t)
return smca_names[t];
}
-enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
+static enum smca_bank_types smca_get_bank_type_old(unsigned int cpu, unsigned int bank)
{
struct smca_bank *b;
@@ -144,9 +149,8 @@ enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
return b->hwid->bank_type;
}
-EXPORT_SYMBOL_GPL(smca_get_bank_type);
-static const struct smca_hwid smca_hwid_mcatypes[] = {
+static const struct smca_hwid smca_hwid_mcatypes_old[] = {
/* { bank_type, hwid_mcatype } */
/* Reserved type */
@@ -210,6 +214,83 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
};
+/* Keep sorted first by HWID then by McaType. */
+static const u32 smca_hwid_mcatypes[] = {
+ /* Reserved type */
+ [SMCA_RESERVED] = HWID_MCATYPE(0x00, 0x0),
+
+ /* System Management Unit MCA type */
+ [SMCA_SMU] = HWID_MCATYPE(0x01, 0x0),
+ [SMCA_SMU_V2] = HWID_MCATYPE(0x01, 0x1),
+
+ /* Microprocessor 5 Unit MCA type */
+ [SMCA_MP5] = HWID_MCATYPE(0x01, 0x2),
+
+ /* MPDMA MCA type */
+ [SMCA_MPDMA] = HWID_MCATYPE(0x01, 0x3),
+
+ /* Parameter Block MCA type */
+ [SMCA_PB] = HWID_MCATYPE(0x05, 0x0),
+
+ /* Northbridge IO Unit MCA type */
+ [SMCA_NBIO] = HWID_MCATYPE(0x18, 0x0),
+
+ /* Data Fabric MCA types */
+ [SMCA_CS] = HWID_MCATYPE(0x2E, 0x0),
+ [SMCA_PIE] = HWID_MCATYPE(0x2E, 0x1),
+ [SMCA_CS_V2] = HWID_MCATYPE(0x2E, 0x2),
+
+ /* PCI Express Unit MCA type */
+ [SMCA_PCIE] = HWID_MCATYPE(0x46, 0x0),
+ [SMCA_PCIE_V2] = HWID_MCATYPE(0x46, 0x1),
+
+ [SMCA_XGMI_PCS] = HWID_MCATYPE(0x50, 0x0),
+ [SMCA_NBIF] = HWID_MCATYPE(0x6C, 0x0),
+ [SMCA_SHUB] = HWID_MCATYPE(0x80, 0x0),
+
+ /* Unified Memory Controller MCA type */
+ [SMCA_UMC] = HWID_MCATYPE(0x96, 0x0),
+ [SMCA_UMC_V2] = HWID_MCATYPE(0x96, 0x1),
+
+ [SMCA_SATA] = HWID_MCATYPE(0xA8, 0x0),
+ [SMCA_USB] = HWID_MCATYPE(0xAA, 0x0),
+
+ /* ZN Core (HWID=0xB0) MCA types */
+ [SMCA_LS] = HWID_MCATYPE(0xB0, 0x0),
+ [SMCA_IF] = HWID_MCATYPE(0xB0, 0x1),
+ [SMCA_L2_CACHE] = HWID_MCATYPE(0xB0, 0x2),
+ [SMCA_DE] = HWID_MCATYPE(0xB0, 0x3),
+ /* HWID 0xB0 MCATYPE 0x4 is Reserved */
+ [SMCA_EX] = HWID_MCATYPE(0xB0, 0x5),
+ [SMCA_FP] = HWID_MCATYPE(0xB0, 0x6),
+ [SMCA_L3_CACHE] = HWID_MCATYPE(0xB0, 0x7),
+ [SMCA_LS_V2] = HWID_MCATYPE(0xB0, 0x10),
+
+ /* Platform Security Processor MCA type */
+ [SMCA_PSP] = HWID_MCATYPE(0xFF, 0x0),
+ [SMCA_PSP_V2] = HWID_MCATYPE(0xFF, 0x1),
+
+ [SMCA_GMI_PCS] = HWID_MCATYPE(0x241, 0x0),
+ [SMCA_XGMI_PHY] = HWID_MCATYPE(0x259, 0x0),
+ [SMCA_WAFL_PHY] = HWID_MCATYPE(0x267, 0x0),
+ [SMCA_GMI_PHY] = HWID_MCATYPE(0x269, 0x0),
+};
+
+enum smca_bank_types smca_get_bank_type(u64 ipid)
+{
+ enum smca_bank_types type;
+ u32 hwid_mcatype = HWID_MCATYPE(FIELD_GET(MCI_IPID_HWID, ipid),
+ FIELD_GET(MCI_IPID_MCATYPE, ipid));
+
+ for (type = 0; type < ARRAY_SIZE(smca_hwid_mcatypes); type++) {
+ if (hwid_mcatype == smca_hwid_mcatypes[type])
+ return type;
+ }
+
+ return N_SMCA_BANK_TYPES;
+}
+EXPORT_SYMBOL_GPL(smca_get_bank_type);
+
/*
* In SMCA enabled processors, we can have multiple banks for a given IP type.
* So to define a unique name for each bank, we use a temp c-string to append
@@ -310,11 +391,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
return;
}
- hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
- (high & MCI_IPID_MCATYPE) >> 16);
+ hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID_OLD,
+ (high & MCI_IPID_MCATYPE_OLD) >> 16);
- for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
- s_hwid = &smca_hwid_mcatypes[i];
+ for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes_old); i++) {
+ s_hwid = &smca_hwid_mcatypes_old[i];
if (hwid_mcatype == s_hwid->hwid_mcatype) {
this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
@@ -724,7 +805,7 @@ static bool smca_mce_is_memory_error(struct mce *m)
if (XEC(m->status, 0x3f))
return false;
- bank_type = smca_get_bank_type(m->extcpu, m->bank);
+ bank_type = smca_get_bank_type(m->ipid);
return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
}
@@ -1097,7 +1178,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
return th_names[bank];
}
- bank_type = smca_get_bank_type(cpu, bank);
+ bank_type = smca_get_bank_type_old(cpu, bank);
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 1f3520d76861..4b3764ea7c59 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1041,7 +1041,7 @@ static int fixup_node_id(int node_id, struct mce *m)
/* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */
u8 nid = (m->ipid >> 44) & 0xF;
- if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2)
+ if (smca_get_bank_type(m->ipid) != SMCA_UMC_V2)
return node_id;
/* Nodes below the GPU base node are CPU nodes and don't need a fixup. */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 8130c3dc64da..e02af5da1ec2 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -731,7 +731,7 @@ static const char *smca_get_long_name(enum smca_bank_types t)
/* Decode errors according to Scalable MCA specification */
static void decode_smca_error(struct mce *m)
{
- enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
+ enum smca_bank_types bank_type = smca_get_bank_type(m->ipid);
u8 xec = XEC(m->status, xec_mask);
if (bank_type >= N_SMCA_BANK_TYPES)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8ebab6f22e5a..c543600b759b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3546,7 +3546,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
* and error occurred in DramECC (Extended error code = 0) then only
* process the error, else bail out.
*/
- if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
+ if (!m || !((smca_get_bank_type(m->ipid) == SMCA_UMC_V2) &&
(XEC(m->status, 0x3f) == 0x0)))
return NOTIFY_DONE;
--
2.34.1