[GIT PULL] RAS changes for v4.9
From: Ingo Molnar
Date: Mon Oct 03 2016 - 04:32:05 EST
Linus,
Please pull the latest ras-core-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git ras-core-for-linus
# HEAD: b199ac6c4943aa0db246163bf6b483e2bb53431b x86/RAS/mce_amd_inj: Remove debugfs dir recursively on exit
The main changes were:
- Lots of enhancements for AMD SMCA (Scalable MCA features/extensions) systems:
extract, decode and print more hardware error information and add matching
support on the injection/testing side as well. (Yazn Ghannam)
- Various MCE handling improvements on modern Intel Xeons. (Tony Luck)
- Plus misc fixes and enhancements.
Thanks,
Ingo
------------------>
Borislav Petkov (2):
x86/RAS/mce_amd_inj: Fix some W= warnings
x86/RAS/mce_amd_inj: Remove debugfs dir recursively on exit
Colin Ian King (1):
x86/RAS/mce_amd_inj: Fix signed wrap around when decrementing index 'i'
Tony Luck (4):
locking/static_keys: Provide DECLARE and well as DEFINE macros
x86/mce: Add PCI quirks to identify Xeons with machine check recovery
x86/mce: Improve memcpy_mcsafe()
x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model string test
Yazen Ghannam (14):
x86/mce/AMD: Use msr_ops.misc() in allocate_threshold_blocks()
x86/mce: Add support for new MCA_SYND register
EDAC/mce_amd: Print syndrome register value on SMCA systems
x86/RAS: Add syndrome support to mce_amd_inj
x86/mce/AMD: Read MSRs on the CPU allocating the threshold blocks
EDAC/mce_amd: Add missing SMCA error descriptions
EDAC/mce_amd: Use SMCA prefix for error descriptions arrays
x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types
x86/mce/AMD: Update sysfs bank names for SMCA systems
x86/mce/AMD: Ensure the deferred error interrupt is of type APIC on SMCA systems
x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems
x86/mce, EDAC/mce_amd: Print MCA_SYND and MCA_IPID during MCE on SMCA systems
x86/mce/AMD: Extract the error address on SMCA systems
x86/MCE/AMD, EDAC: Handle reserved bank 4 on Fam17h properly
arch/x86/include/asm/cpufeatures.h | 1 -
arch/x86/include/asm/mce.h | 66 +++++-----
arch/x86/include/asm/pmem.h | 5 +-
arch/x86/include/asm/string_64.h | 19 ++-
arch/x86/include/uapi/asm/mce.h | 2 +
arch/x86/kernel/cpu/mcheck/mce.c | 44 +++++--
arch/x86/kernel/cpu/mcheck/mce_amd.c | 204 +++++++++++++++++++++++------
arch/x86/kernel/quirks.c | 31 +++++
arch/x86/kernel/x8664_ksyms_64.c | 2 +-
arch/x86/lib/memcpy_64.S | 6 +-
arch/x86/ras/mce_amd_inj.c | 54 ++++----
drivers/edac/mce_amd.c | 244 ++++++++++-------------------------
include/linux/jump_label.h | 6 +
include/trace/events/mce.h | 9 +-
14 files changed, 407 insertions(+), 286 deletions(-)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308b96f6..1188bc849ee3 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -106,7 +106,6 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766ef0e18..9bd7ff5ffbcc 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
/* AMD-specific bits */
+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
+#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
-#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
@@ -119,6 +121,7 @@
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
@@ -334,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
* Scalable MCA.
*/
#ifdef CONFIG_X86_MCE_AMD
-enum amd_ip_types {
- SMCA_F17H_CORE = 0, /* Core errors */
- SMCA_DF, /* Data Fabric */
- SMCA_UMC, /* Unified Memory Controller */
- SMCA_PB, /* Parameter Block */
- SMCA_PSP, /* Platform Security Processor */
- SMCA_SMU, /* System Management Unit */
- N_AMD_IP_TYPES
-};
-
-struct amd_hwid {
- const char *name;
- unsigned int hwid;
-};
-
-extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
-enum amd_core_mca_blocks {
+/* These may be used by multiple smca_hwid_mcatypes */
+enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
SMCA_IF, /* Instruction Fetch */
- SMCA_L2_CACHE, /* L2 cache */
- SMCA_DE, /* Decoder unit */
- RES, /* Reserved */
- SMCA_EX, /* Execution unit */
+ SMCA_L2_CACHE, /* L2 Cache */
+ SMCA_DE, /* Decoder Unit */
+ SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
- SMCA_L3_CACHE, /* L3 cache */
- N_CORE_MCA_BLOCKS
+ SMCA_L3_CACHE, /* L3 Cache */
+ SMCA_CS, /* Coherent Slave */
+ SMCA_PIE, /* Power, Interrupts, etc. */
+ SMCA_UMC, /* Unified Memory Controller */
+ SMCA_PB, /* Parameter Block */
+ SMCA_PSP, /* Platform Security Processor */
+ SMCA_SMU, /* System Management Unit */
+ N_SMCA_BANK_TYPES
};
-extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+struct smca_bank_name {
+ const char *name; /* Short name for sysfs */
+ const char *long_name; /* Long name for pretty-printing */
+};
+
+extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
+
+#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
-enum amd_df_mca_blocks {
- SMCA_CS = 0, /* Coherent Slave */
- SMCA_PIE, /* Power management, Interrupts, etc */
- N_DF_BLOCKS
+struct smca_hwid_mcatype {
+ unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
+ u32 hwid_mcatype; /* (hwid,mcatype) tuple */
+ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
};
-extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+struct smca_bank_info {
+ struct smca_hwid_mcatype *type;
+ u32 type_instance;
+};
+
+extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+
#endif
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 643eba42d620..2c1ebeb4d737 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
- if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
- return memcpy_mcsafe(dst, src, n);
- memcpy(dst, src, n);
- return 0;
+ return memcpy_mcsafe(dst, src, n);
}
/**
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 90dbbd9666d4..a164862d77e3 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -2,6 +2,7 @@
#define _ASM_X86_STRING_64_H
#ifdef __KERNEL__
+#include <linux/jump_label.h>
/* Written 2002 by Andi Kleen */
@@ -78,6 +79,9 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n)
#endif
+__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+DECLARE_STATIC_KEY_FALSE(mcsafe_key);
+
/**
* memcpy_mcsafe - copy memory with indication if a machine check happened
*
@@ -86,10 +90,23 @@ int strcmp(const char *cs, const char *ct);
* @cnt: number of bytes to copy
*
* Low level memory copy function that catches machine checks
+ * We only call into the "safe" function on systems that can
+ * actually do machine check recovery. Everyone else can just
+ * use memcpy().
*
* Return 0 for success, -EFAULT for fail
*/
-int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+static __always_inline __must_check int
+memcpy_mcsafe(void *dst, const void *src, size_t cnt)
+{
+#ifdef CONFIG_X86_MCE
+ if (static_branch_unlikely(&mcsafe_key))
+ return memcpy_mcsafe_unrolled(dst, src, cnt);
+ else
+#endif
+ memcpy(dst, src, cnt);
+ return 0;
+}
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943341bf..69a6e07e3149 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,8 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
+ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec849468..a7fdf453d895 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -41,6 +41,7 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
+#include <linux/jump_label.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -292,6 +293,13 @@ static void print_mce(struct mce *m)
if (m->misc)
pr_cont("MISC %llx ", m->misc);
+ if (mce_flags.smca) {
+ if (m->synd)
+ pr_cont("SYND %llx ", m->synd);
+ if (m->ipid)
+ pr_cont("IPID %llx ", m->ipid);
+ }
+
pr_cont("\n");
/*
* Note this output is parsed by external tools and old fields
@@ -568,6 +576,7 @@ static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
+
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i));
@@ -579,6 +588,23 @@ static void mce_read_aux(struct mce *m, int i)
m->addr >>= shift;
m->addr <<= shift;
}
+
+ /*
+ * Extract [55:<lsb>] where lsb is the least significant
+ * *valid* bit of the address bits.
+ */
+ if (mce_flags.smca) {
+ u8 lsb = (m->addr >> 56) & 0x3f;
+
+ m->addr &= GENMASK_ULL(55, lsb);
+ }
+ }
+
+ if (mce_flags.smca) {
+ m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+
+ if (m->status & MCI_STATUS_SYNDV)
+ m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
}
}
@@ -1633,17 +1659,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
- /*
- * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
- * whether this processor will actually generate recoverable
- * machine checks. Check to see if this is an E7 model Xeon.
- * We can't do a model number check because E5 and E7 use the
- * same model number. E5 doesn't support recovery, E7 does.
- */
- if (mca_cfg.recovery || (mca_cfg.ser &&
- !strncmp(c->x86_model_id,
- "Intel(R) Xeon(R) CPU E7-", 24)))
- set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -2080,6 +2095,7 @@ void mce_disable_bank(int bank)
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
+ * mce=recovery force enable memcpy_mcsafe()
*/
static int __init mcheck_enable(char *str)
{
@@ -2676,8 +2692,14 @@ static int __init mcheck_debugfs_init(void)
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
#endif
+DEFINE_STATIC_KEY_FALSE(mcsafe_key);
+EXPORT_SYMBOL_GPL(mcsafe_key);
+
static int __init mcheck_late_init(void)
{
+ if (mca_cfg.recovery)
+ static_branch_inc(&mcsafe_key);
+
mcheck_debugfs_init();
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7b7f3be783d4..9b5403462936 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>
+#include <linux/string.h>
#include <asm/amd_nb.h>
#include <asm/apic.h>
@@ -63,34 +64,71 @@ static const char * const th_names[] = {
"execution_unit",
};
-/* Define HWID to IP type mappings for Scalable MCA */
-struct amd_hwid amd_hwids[] = {
- [SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
- [SMCA_DF] = { "data_fabric", 0x2E },
- [SMCA_UMC] = { "umc", 0x96 },
- [SMCA_PB] = { "param_block", 0x5 },
- [SMCA_PSP] = { "psp", 0xFF },
- [SMCA_SMU] = { "smu", 0x1 },
+static const char * const smca_umc_block_names[] = {
+ "dram_ecc",
+ "misc_umc"
};
-EXPORT_SYMBOL_GPL(amd_hwids);
-
-const char * const amd_core_mcablock_names[] = {
- [SMCA_LS] = "load_store",
- [SMCA_IF] = "insn_fetch",
- [SMCA_L2_CACHE] = "l2_cache",
- [SMCA_DE] = "decode_unit",
- [RES] = "",
- [SMCA_EX] = "execution_unit",
- [SMCA_FP] = "floating_point",
- [SMCA_L3_CACHE] = "l3_cache",
+
+struct smca_bank_name smca_bank_names[] = {
+ [SMCA_LS] = { "load_store", "Load Store Unit" },
+ [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
+ [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
+ [SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_EX] = { "execution_unit", "Execution Unit" },
+ [SMCA_FP] = { "floating_point", "Floating Point Unit" },
+ [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
+ [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
+ [SMCA_UMC] = { "umc", "Unified Memory Controller" },
+ [SMCA_PB] = { "param_block", "Parameter Block" },
+ [SMCA_PSP] = { "psp", "Platform Security Processor" },
+ [SMCA_SMU] = { "smu", "System Management Unit" },
};
-EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+EXPORT_SYMBOL_GPL(smca_bank_names);
+
+static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
+ /* { bank_type, hwid_mcatype, xec_bitmap } */
+
+ /* ZN Core (HWID=0xB0) MCA types */
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
+ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
+ { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+ /* HWID 0xB0 MCATYPE 0x4 is Reserved */
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+ { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
+ { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+
+ /* Data Fabric MCA types */
+ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
+
+ /* Unified Memory Controller MCA type */
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
+
+ /* Parameter Block MCA type */
+ { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
+
+ /* Platform Security Processor MCA type */
+ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
-const char * const amd_df_mcablock_names[] = {
- [SMCA_CS] = "coherent_slave",
- [SMCA_PIE] = "pie",
+ /* System Management Unit MCA type */
+ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
};
-EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
+struct smca_bank_info smca_banks[MAX_NR_BANKS];
+EXPORT_SYMBOL_GPL(smca_banks);
+
+/*
+ * In SMCA enabled processors, we can have multiple banks for a given IP type.
+ * So to define a unique name for each bank, we use a temp c-string to append
+ * the MCA_IPID[InstanceId] to type's name in get_name().
+ *
+ * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
+ * is greater than 8 plus 1 (for underscore) plus length of longest type name.
+ */
+#define MAX_MCATYPE_NAME_LEN 30
+static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
@@ -108,6 +146,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
* CPU Initialization
*/
+static void get_smca_bank_info(unsigned int bank)
+{
+ unsigned int i, hwid_mcatype, cpu = smp_processor_id();
+ struct smca_hwid_mcatype *type;
+ u32 high, instanceId;
+ u16 hwid, mcatype;
+
+ /* Collect bank_info using CPU 0 for now. */
+ if (cpu)
+ return;
+
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
+ pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
+ return;
+ }
+
+ hwid = high & MCI_IPID_HWID;
+ mcatype = (high & MCI_IPID_MCATYPE) >> 16;
+ hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
+ type = &smca_hwid_mcatypes[i];
+ if (hwid_mcatype == type->hwid_mcatype) {
+ smca_banks[bank].type = type;
+ smca_banks[bank].type_instance = instanceId;
+ break;
+ }
+ }
+}
+
struct thresh_restart {
struct threshold_block *b;
int reset;
@@ -293,7 +361,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -309,13 +377,13 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
*/
u32 low, high;
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
return addr;
if (!(low & MCI_CONFIG_MCAX))
return addr;
- if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
@@ -395,6 +463,20 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
*/
smca_high &= ~BIT(2);
+ /*
+ * SMCA sets the Deferred Error Interrupt type per bank.
+ *
+ * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
+ * if the DeferredIntType bit field is available.
+ *
+ * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
+ * high portion of the MSR). OS should set this to 0x1 to enable
+ * APIC based interrupt. First, check that no interrupt has been
+ * set.
+ */
+ if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3))
+ smca_high |= BIT(5);
+
wrmsr(smca_addr, smca_low, smca_high);
}
@@ -421,12 +503,15 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
u32 low = 0, high = 0, address = 0;
- unsigned int bank, block;
+ unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ if (mce_flags.smca)
+ get_smca_bank_info(bank);
+
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(cpu, address, low, high, bank, block);
if (!address)
break;
@@ -476,9 +561,27 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
if (threshold_err)
m.misc = misc;
- if (m.status & MCI_STATUS_ADDRV)
+ if (m.status & MCI_STATUS_ADDRV) {
rdmsrl(msr_addr, m.addr);
+ /*
+ * Extract [55:<lsb>] where lsb is the least significant
+ * *valid* bit of the address bits.
+ */
+ if (mce_flags.smca) {
+ u8 lsb = (m.addr >> 56) & 0x3f;
+
+ m.addr &= GENMASK_ULL(55, lsb);
+ }
+ }
+
+ if (mce_flags.smca) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+
+ if (m.status & MCI_STATUS_SYNDV)
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ }
+
mce_log(&m);
wrmsrl(msr_status, 0);
@@ -541,15 +644,14 @@ static void amd_deferred_error_interrupt(void)
static void amd_threshold_interrupt(void)
{
u32 low = 0, high = 0, address = 0;
- int cpu = smp_processor_id();
- unsigned int bank, block;
+ unsigned int bank, block, cpu = smp_processor_id();
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(cpu, address, low, high, bank, block);
if (!address)
break;
@@ -713,6 +815,34 @@ static struct kobj_type threshold_ktype = {
.default_attrs = default_attrs,
};
+static const char *get_name(unsigned int bank, struct threshold_block *b)
+{
+ unsigned int bank_type;
+
+ if (!mce_flags.smca) {
+ if (b && bank == 4)
+ return bank4_names(b);
+
+ return th_names[bank];
+ }
+
+ if (!smca_banks[bank].type)
+ return NULL;
+
+ bank_type = smca_banks[bank].type->bank_type;
+
+ if (b && bank_type == SMCA_UMC) {
+ if (b->block < ARRAY_SIZE(smca_umc_block_names))
+ return smca_umc_block_names[b->block];
+ return NULL;
+ }
+
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
+ "%s_%x", smca_bank_names[bank_type].name,
+ smca_banks[bank].type_instance);
+ return buf_mcatype;
+}
+
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
unsigned int block, u32 address)
{
@@ -767,11 +897,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
per_cpu(threshold_banks, cpu)[bank]->kobj,
- (bank == 4 ? bank4_names(b) : th_names[bank]));
+ get_name(bank, b));
if (err)
goto out_free;
recurse:
- address = get_block_address(address, low, high, bank, ++block);
+ address = get_block_address(cpu, address, low, high, bank, ++block);
if (!address)
return 0;
@@ -822,7 +952,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
struct device *dev = per_cpu(mce_device, cpu);
struct amd_northbridge *nb = NULL;
struct threshold_bank *b = NULL;
- const char *name = th_names[bank];
+ const char *name = get_name(bank, NULL);
int err = 0;
if (is_shared_bank(bank)) {
@@ -869,7 +999,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
}
}
- err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
+ err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
if (!err)
goto out;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index cc457ff818ad..51402a7e4ca6 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -626,3 +626,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);
#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#include <linux/jump_label.h>
+#include <asm/string_64.h>
+
+/* Ivy Bridge, Haswell, Broadwell */
+static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
+{
+ u32 capid0;
+
+ pci_read_config_dword(pdev, 0x84, &capid0);
+
+ if (capid0 & 0x10)
+ static_branch_inc(&mcsafe_key);
+}
+
+/* Skylake */
+static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
+{
+ u32 capid0;
+
+ pci_read_config_dword(pdev, 0x84, &capid0);
+
+ if ((capid0 & 0xc0) == 0xc0)
+ static_branch_inc(&mcsafe_key);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
+#endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 95e49f6e4fc3..b2cee3d19477 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -38,7 +38,7 @@ EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 2ec0b0abbfaa..49e6ebac7e73 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
- * memcpy_mcsafe - memory copy with machine check exception handling
+ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe)
+ENTRY(memcpy_mcsafe_unrolled)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe)
+ENDPROC(memcpy_mcsafe_unrolled)
.section .fixup, "ax"
/* Return -EFAULT for any failure */
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 1104515d5ad2..1ac76479c266 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -68,6 +68,7 @@ static int inj_##reg##_set(void *data, u64 val) \
MCE_INJECT_SET(status);
MCE_INJECT_SET(misc);
MCE_INJECT_SET(addr);
+MCE_INJECT_SET(synd);
#define MCE_INJECT_GET(reg) \
static int inj_##reg##_get(void *data, u64 *val) \
@@ -81,10 +82,12 @@ static int inj_##reg##_get(void *data, u64 *val) \
MCE_INJECT_GET(status);
MCE_INJECT_GET(misc);
MCE_INJECT_GET(addr);
+MCE_INJECT_GET(synd);
DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
/*
* Caller needs to be make sure this cpu doesn't disappear
@@ -243,27 +246,27 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
static void prepare_msrs(void *info)
{
- struct mce i_mce = *(struct mce *)info;
- u8 b = i_mce.bank;
+ struct mce m = *(struct mce *)info;
+ u8 b = m.bank;
- wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+ wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
- if (i_mce.inject_flags == DFR_INT_INJ) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+ if (m.inject_flags == DFR_INT_INJ) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
} else {
- wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+ wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
}
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
+ wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
} else {
- wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
- wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
- wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+ wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
+ wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
+ wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
}
-
}
static void do_inject(void)
@@ -275,6 +278,9 @@ static void do_inject(void)
if (i_mce.misc)
i_mce.status |= MCI_STATUS_MISCV;
+ if (i_mce.synd)
+ i_mce.status |= MCI_STATUS_SYNDV;
+
if (inj_type == SW_INJ) {
mce_inject_log(&i_mce);
return;
@@ -301,7 +307,9 @@ static void do_inject(void)
* only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
* Fam10h and later BKDGs.
*/
- if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+ if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+ b == 4 &&
+ boot_cpu_data.x86 < 0x17) {
toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
cpu = get_nbc_for_node(amd_get_nb_id(cpu));
}
@@ -371,6 +379,9 @@ static const char readme_msg[] =
"\t used for error thresholding purposes and its validity is indicated by\n"
"\t MCi_STATUS[MiscV].\n"
"\n"
+"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
+"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
+"\n"
"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
"\t associated with the error.\n"
"\n"
@@ -420,6 +431,7 @@ static struct dfs_node {
{ .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
@@ -428,7 +440,7 @@ static struct dfs_node {
static int __init init_mce_inject(void)
{
- int i;
+ unsigned int i;
u64 cap;
rdmsrl(MSR_IA32_MCG_CAP, cap);
@@ -452,26 +464,22 @@ static int __init init_mce_inject(void)
return 0;
err_dfs_add:
- while (--i >= 0)
+ while (i-- > 0)
debugfs_remove(dfs_fls[i].d);
debugfs_remove(dfs_inj);
dfs_inj = NULL;
- return -ENOMEM;
+ return -ENODEV;
}
static void __exit exit_mce_inject(void)
{
- int i;
- for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
- debugfs_remove(dfs_fls[i].d);
+ debugfs_remove_recursive(dfs_inj);
+ dfs_inj = NULL;
memset(&dfs_fls, 0, sizeof(dfs_fls));
-
- debugfs_remove(dfs_inj);
- dfs_inj = NULL;
}
module_init(init_mce_inject);
module_exit(exit_mce_inject);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 9b6800a79c7f..daaac2c79ca7 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -148,12 +148,12 @@ static const char * const mc6_mce_desc[] = {
};
/* Scalable MCA error strings */
-static const char * const f17h_ls_mce_desc[] = {
+static const char * const smca_ls_mce_desc[] = {
"Load queue parity",
"Store queue parity",
"Miss address buffer payload parity",
"L1 TLB parity",
- "", /* reserved */
+ "Reserved",
"DC tag error type 6",
"DC tag error type 1",
"Internal error type 1",
@@ -172,7 +172,7 @@ static const char * const f17h_ls_mce_desc[] = {
"L2 fill data error",
};
-static const char * const f17h_if_mce_desc[] = {
+static const char * const smca_if_mce_desc[] = {
"microtag probe port parity error",
"IC microtag or full tag multi-hit error",
"IC full tag parity",
@@ -185,19 +185,22 @@ static const char * const f17h_if_mce_desc[] = {
"BPQ snoop parity on Thread 1",
"L1 BTB multi-match error",
"L2 BTB multi-match error",
+ "L2 Cache Response Poison error",
+ "System Read Data error",
};
-static const char * const f17h_l2_mce_desc[] = {
+static const char * const smca_l2_mce_desc[] = {
"L2M tag multi-way-hit error",
"L2M tag ECC error",
"L2M data ECC error",
"HW assert",
};
-static const char * const f17h_de_mce_desc[] = {
+static const char * const smca_de_mce_desc[] = {
"uop cache tag parity error",
"uop cache data parity error",
"Insn buffer parity error",
+ "uop queue parity error",
"Insn dispatch queue parity error",
"Fetch address FIFO parity",
"Patch RAM data parity",
@@ -205,7 +208,7 @@ static const char * const f17h_de_mce_desc[] = {
"uop buffer parity"
};
-static const char * const f17h_ex_mce_desc[] = {
+static const char * const smca_ex_mce_desc[] = {
"Watchdog timeout error",
"Phy register file parity",
"Flag register file parity",
@@ -214,18 +217,22 @@ static const char * const f17h_ex_mce_desc[] = {
"EX payload parity",
"Checkpoint queue parity",
"Retire dispatch queue parity",
+ "Retire status queue parity error",
+ "Scheduling queue parity error",
+ "Branch buffer queue parity error",
};
-static const char * const f17h_fp_mce_desc[] = {
+static const char * const smca_fp_mce_desc[] = {
"Physical register file parity",
"Freelist parity error",
"Schedule queue parity",
"NSQ parity error",
"Retire queue parity",
"Status register file parity",
+ "Hardware assertion",
};
-static const char * const f17h_l3_mce_desc[] = {
+static const char * const smca_l3_mce_desc[] = {
"Shadow tag macro ECC error",
"Shadow tag macro multi-way-hit error",
"L3M tag ECC error",
@@ -236,7 +243,7 @@ static const char * const f17h_l3_mce_desc[] = {
"L3 HW assert",
};
-static const char * const f17h_cs_mce_desc[] = {
+static const char * const smca_cs_mce_desc[] = {
"Illegal request from transport layer",
"Address violation",
"Security violation",
@@ -248,14 +255,14 @@ static const char * const f17h_cs_mce_desc[] = {
"ECC error on probe filter access",
};
-static const char * const f17h_pie_mce_desc[] = {
+static const char * const smca_pie_mce_desc[] = {
"HW assert",
"Internal PIE register security violation",
"Error on GMI link",
"Poison data written to internal PIE register",
};
-static const char * const f17h_umc_mce_desc[] = {
+static const char * const smca_umc_mce_desc[] = {
"DRAM ECC error",
"Data poison error on DRAM",
"SDP parity error",
@@ -264,18 +271,39 @@ static const char * const f17h_umc_mce_desc[] = {
"Write data CRC error",
};
-static const char * const f17h_pb_mce_desc[] = {
+static const char * const smca_pb_mce_desc[] = {
"Parameter Block RAM ECC error",
};
-static const char * const f17h_psp_mce_desc[] = {
+static const char * const smca_psp_mce_desc[] = {
"PSP RAM ECC or parity error",
};
-static const char * const f17h_smu_mce_desc[] = {
+static const char * const smca_smu_mce_desc[] = {
"SMU RAM ECC or parity error",
};
+struct smca_mce_desc {
+ const char * const *descs;
+ unsigned int num_descs;
+};
+
+static struct smca_mce_desc smca_mce_descs[] = {
+ [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
+ [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
+ [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
+ [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
+ [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
+ [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
+ [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
+ [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
+ [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
+ [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
+ [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
+ [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
+ [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
+};
+
static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
@@ -820,175 +848,35 @@ static void decode_mc6_mce(struct mce *m)
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
-static void decode_f17h_core_errors(const char *ip_name, u8 xec,
- unsigned int mca_type)
-{
- const char * const *error_desc_array;
- size_t len;
-
- pr_emerg(HW_ERR "%s Error: ", ip_name);
-
- switch (mca_type) {
- case SMCA_LS:
- error_desc_array = f17h_ls_mce_desc;
- len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
-
- if (xec == 0x4) {
- pr_cont("Unrecognized LS MCA error code.\n");
- return;
- }
- break;
-
- case SMCA_IF:
- error_desc_array = f17h_if_mce_desc;
- len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
- break;
-
- case SMCA_L2_CACHE:
- error_desc_array = f17h_l2_mce_desc;
- len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
- break;
-
- case SMCA_DE:
- error_desc_array = f17h_de_mce_desc;
- len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
- break;
-
- case SMCA_EX:
- error_desc_array = f17h_ex_mce_desc;
- len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
- break;
-
- case SMCA_FP:
- error_desc_array = f17h_fp_mce_desc;
- len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
- break;
-
- case SMCA_L3_CACHE:
- error_desc_array = f17h_l3_mce_desc;
- len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
- break;
-
- default:
- pr_cont("Corrupted MCA core error info.\n");
- return;
- }
-
- if (xec > len) {
- pr_cont("Unrecognized %s MCA bank error code.\n",
- amd_core_mcablock_names[mca_type]);
- return;
- }
-
- pr_cont("%s.\n", error_desc_array[xec]);
-}
-
-static void decode_df_errors(u8 xec, unsigned int mca_type)
-{
- const char * const *error_desc_array;
- size_t len;
-
- pr_emerg(HW_ERR "Data Fabric Error: ");
-
- switch (mca_type) {
- case SMCA_CS:
- error_desc_array = f17h_cs_mce_desc;
- len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
- break;
-
- case SMCA_PIE:
- error_desc_array = f17h_pie_mce_desc;
- len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
- break;
-
- default:
- pr_cont("Corrupted MCA Data Fabric info.\n");
- return;
- }
-
- if (xec > len) {
- pr_cont("Unrecognized %s MCA bank error code.\n",
- amd_df_mcablock_names[mca_type]);
- return;
- }
-
- pr_cont("%s.\n", error_desc_array[xec]);
-}
-
/* Decode errors according to Scalable MCA specification */
static void decode_smca_errors(struct mce *m)
{
- u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
- unsigned int hwid, mca_type, i;
- u8 xec = XEC(m->status, xec_mask);
- const char * const *error_desc_array;
+ struct smca_hwid_mcatype *type;
+ unsigned int bank_type;
const char *ip_name;
- u32 low, high;
- size_t len;
+ u8 xec = XEC(m->status, xec_mask);
- if (rdmsr_safe(addr, &low, &high)) {
- pr_emerg("Invalid IP block specified, error information is unreliable.\n");
+ if (m->bank >= ARRAY_SIZE(smca_banks))
return;
- }
-
- hwid = high & MCI_IPID_HWID;
- mca_type = (high & MCI_IPID_MCATYPE) >> 16;
-
- pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
-
- /*
- * Based on hwid and mca_type values, decode errors from respective IPs.
- * Note: mca_type values make sense only in the context of an hwid.
- */
- for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
- if (amd_hwids[i].hwid == hwid)
- break;
-
- switch (i) {
- case SMCA_F17H_CORE:
- ip_name = (mca_type == SMCA_L3_CACHE) ?
- "L3 Cache" : "F17h Core";
- return decode_f17h_core_errors(ip_name, xec, mca_type);
- break;
- case SMCA_DF:
- return decode_df_errors(xec, mca_type);
- break;
-
- case SMCA_UMC:
- error_desc_array = f17h_umc_mce_desc;
- len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
- break;
-
- case SMCA_PB:
- error_desc_array = f17h_pb_mce_desc;
- len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
- break;
+ if (boot_cpu_data.x86 >= 0x17 && m->bank == 4)
+ pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
- case SMCA_PSP:
- error_desc_array = f17h_psp_mce_desc;
- len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
- break;
-
- case SMCA_SMU:
- error_desc_array = f17h_smu_mce_desc;
- len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
- break;
-
- default:
- pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+ type = smca_banks[m->bank].type;
+ if (!type)
return;
- }
- ip_name = amd_hwids[i].name;
- pr_emerg(HW_ERR "%s Error: ", ip_name);
+ bank_type = type->bank_type;
+ ip_name = smca_bank_names[bank_type].long_name;
- if (xec > len) {
- pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
- return;
- }
+ pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
- pr_cont("%s.\n", error_desc_array[xec]);
+ /* Only print the decode of valid error codes */
+ if (xec < smca_mce_descs[bank_type].num_descs &&
+ (type->xec_bitmap & BIT_ULL(xec))) {
+ pr_emerg(HW_ERR "%s Error: ", ip_name);
+ pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+ }
}
static inline void amd_decode_err_code(u16 ec)
@@ -1078,6 +966,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+ pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
+
if (!rdmsr_safe(addr, &low, &high) &&
(low & MCI_CONFIG_MCAX))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
@@ -1091,12 +981,20 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
- pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
+ pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (m->status & MCI_STATUS_SYNDV)
+ pr_cont(", Syndrome: 0x%016llx", m->synd);
+
+ pr_cont(", IPID: 0x%016llx", m->ipid);
+
+ pr_cont("\n");
+
decode_smca_errors(m);
goto err_code;
- }
+ } else
+ pr_cont("\n");
if (!fam_ops)
goto err_code;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 661af564fae8..595fb46213fc 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -267,9 +267,15 @@ struct static_key_false {
#define DEFINE_STATIC_KEY_TRUE(name) \
struct static_key_true name = STATIC_KEY_TRUE_INIT
+#define DECLARE_STATIC_KEY_TRUE(name) \
+ extern struct static_key_true name
+
#define DEFINE_STATIC_KEY_FALSE(name) \
struct static_key_false name = STATIC_KEY_FALSE_INIT
+#define DECLARE_STATIC_KEY_FALSE(name) \
+ extern struct static_key_false name
+
extern bool ____wrong_branch_error(void);
#define static_key_enabled(x) \
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef6baa8..70f02149808c 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,8 @@ TRACE_EVENT(mce_record,
__field( u64, status )
__field( u64, addr )
__field( u64, misc )
+ __field( u64, synd )
+ __field( u64, ipid )
__field( u64, ip )
__field( u64, tsc )
__field( u64, walltime )
@@ -38,6 +40,8 @@ TRACE_EVENT(mce_record,
__entry->status = m->status;
__entry->addr = m->addr;
__entry->misc = m->misc;
+ __entry->synd = m->synd;
+ __entry->ipid = m->ipid;
__entry->ip = m->ip;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
@@ -50,11 +54,12 @@ TRACE_EVENT(mce_record,
__entry->cpuvendor = m->cpuvendor;
),
- TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
- __entry->addr, __entry->misc,
+ __entry->ipid,
+ __entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,
__entry->cpuvendor, __entry->cpuid,