[GIT PULL] RAS changes for v4.8
From: Ingo Molnar
Date: Mon Jul 25 2016 - 04:34:07 EST
Linus,
Please pull the latest ras-core-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git ras-core-for-linus
# HEAD: 38c54ccb2ded3e93d8a353baeb7b9e12e1b77e23 x86/mce: Fix mce_rdmsrl() warning message
The biggest change in this cycle was an enhancement by Yazen Ghannam to reduce the
number of MCE error injection related IPIs. The rest are smaller fixes.
Thanks,
Ingo
------------------>
Aravind Gopalakrishnan (1):
x86/mce/AMD: Increase size of the bank_map type
Borislav Petkov (1):
x86/mce: Fix mce_rdmsrl() warning message
Tony Luck (1):
x86/mce: Do not use bank 1 for APEI generated error logs
Yazen Ghannam (1):
x86/RAS/AMD: Reduce the number of IPIs when prepping error injection
arch/x86/kernel/cpu/mcheck/mce-apei.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +-
arch/x86/ras/mce_amd_inj.c | 58 +++++++++++++++++------------------
4 files changed, 31 insertions(+), 33 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 34c89a3e8260..83f1a98d37db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
return;
mce_setup(&m);
- m.bank = 1;
+ m.bank = -1;
/* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 92e5e37d97bf..58af6300992d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr)
}
if (rdmsrl_safe(msr, &v)) {
- WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+ WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 10b0661651e0..7b7f3be783d4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = {
EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
-static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
+static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index e69f4701a076..1104515d5ad2 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -241,6 +241,31 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
__func__, PCI_FUNC(F3->devfn), NBCFG);
}
+static void prepare_msrs(void *info)
+{
+ struct mce i_mce = *(struct mce *)info;
+ u8 b = i_mce.bank;
+
+ wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (i_mce.inject_flags == DFR_INT_INJ) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+ } else {
+ wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+ }
+
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+ } else {
+ wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
+ wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
+ wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+ }
+
+}
+
static void do_inject(void)
{
u64 mcg_status = 0;
@@ -287,36 +312,9 @@ static void do_inject(void)
toggle_hw_mce_inject(cpu, true);
- wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
- (u32)mcg_status, (u32)(mcg_status >> 32));
-
- if (boot_cpu_has(X86_FEATURE_SMCA)) {
- if (inj_type == DFR_INT_INJ) {
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
- } else {
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
- }
-
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
- (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
- } else {
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
- (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
- }
+ i_mce.mcgstatus = mcg_status;
+ i_mce.inject_flags = inj_type;
+ smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
toggle_hw_mce_inject(cpu, false);