[tip:x86/core] x86/mce/amd: Introduce deferred error interrupt handler

From: tip-bot for Aravind Gopalakrishnan
Date: Sun Jun 07 2015 - 13:38:21 EST


Commit-ID: 24fd78a81f6d3fe7f7a440c8629f9c52cd5f830e
Gitweb: http://git.kernel.org/tip/24fd78a81f6d3fe7f7a440c8629f9c52cd5f830e
Author: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@xxxxxxx>
AuthorDate: Wed, 6 May 2015 06:58:56 -0500
Committer: Borislav Petkov <bp@xxxxxxx>
CommitDate: Thu, 7 May 2015 10:23:32 +0200

x86/mce/amd: Introduce deferred error interrupt handler

Deferred errors indicate error conditions that were not corrected, but
require no action from S/W (or action is optional).These errors provide
info about a latent UC MCE that can occur when a poisoned data is
consumed by the processor.

Processors that report these errors can be configured to generate APIC
interrupts to notify OS about the error.

Provide an interrupt handler in this patch so that OS can catch these
errors as and when they happen. Currently, we simply log the errors and
exit the handler as S/W action is not mandated.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@xxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: x86-ml <x86@xxxxxxxxxx>
Cc: linux-edac <linux-edac@xxxxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/1430913538-1415-5-git-send-email-Aravind.Gopalakrishnan@xxxxxxx
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
arch/x86/include/asm/entry_arch.h | 3 ++
arch/x86/include/asm/hardirq.h | 3 ++
arch/x86/include/asm/hw_irq.h | 2 +
arch/x86/include/asm/irq_vectors.h | 1 +
arch/x86/include/asm/mce.h | 3 ++
arch/x86/include/asm/trace/irq_vectors.h | 6 +++
arch/x86/include/asm/traps.h | 3 +-
arch/x86/kernel/cpu/mcheck/mce_amd.c | 93 ++++++++++++++++++++++++++++++++
arch/x86/kernel/entry_64.S | 5 ++
arch/x86/kernel/irq.c | 6 +++
arch/x86/kernel/irqinit.c | 4 ++
arch/x86/kernel/traps.c | 5 ++
12 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index dc5fa66..6da46db 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -50,4 +50,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif

+#ifdef CONFIG_X86_MCE_AMD
+BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
+#endif
#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b..db9f536 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -33,6 +33,9 @@ typedef struct {
#ifdef CONFIG_X86_MCE_THRESHOLD
unsigned int irq_threshold_count;
#endif
+#ifdef CONFIG_X86_MCE_AMD
+ unsigned int irq_deferred_error_count;
+#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
unsigned int irq_hv_callback_count;
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index e9571dd..f71e489 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -73,6 +73,7 @@ extern asmlinkage void invalidate_interrupt31(void);
extern asmlinkage void irq_move_cleanup_interrupt(void);
extern asmlinkage void reboot_interrupt(void);
extern asmlinkage void threshold_interrupt(void);
+extern asmlinkage void deferred_error_interrupt(void);

extern asmlinkage void call_function_interrupt(void);
extern asmlinkage void call_function_single_interrupt(void);
@@ -87,6 +88,7 @@ extern void trace_spurious_interrupt(void);
extern void trace_thermal_interrupt(void);
extern void trace_reschedule_interrupt(void);
extern void trace_threshold_interrupt(void);
+extern void trace_deferred_error_interrupt(void);
extern void trace_call_function_interrupt(void);
extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 666c89e..026fc1e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -113,6 +113,7 @@
#define IRQ_WORK_VECTOR 0xf6

#define UV_BAU_MESSAGE 0xf5
+#define DEFERRED_ERROR_VECTOR 0xf4

/* Vector on which hypervisor callbacks will be delivered */
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 407ced6..6a3034a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -234,6 +234,9 @@ void do_machine_check(struct pt_regs *, long);
extern void (*mce_threshold_vector)(void);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);

+/* Deferred error interrupt handler */
+extern void (*deferred_error_int_vector)(void);
+
/*
* Thermal handler
*/
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 4cab890..38a09a1 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -101,6 +101,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);

/*
+ * deferred_error_apic - called when entering/exiting a deferred apic interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
+
+/*
* thermal_apic - called when entering/exiting a thermal apic interrupt
* vector handler
*/
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 4e49d7d..c5380be 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
#ifndef CONFIG_X86_32
asmlinkage void smp_thermal_interrupt(void);
-asmlinkage void mce_threshold_interrupt(void);
+asmlinkage void smp_threshold_interrupt(void);
+asmlinkage void smp_deferred_error_interrupt(void);
#endif

extern enum ctx_state ist_enter(struct pt_regs *regs);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 6070757..2e7ebe7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -12,6 +12,8 @@
* - added support for AMD Family 0x10 processors
* May 2012
* - major scrubbing
+ * May 2015
+ * - add support for deferred error interrupts (Aravind Gopalakrishnan)
*
* All MC4_MISCi registers are shared between multi-cores
*/
@@ -32,6 +34,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>

#define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF
@@ -47,6 +50,13 @@
#define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400

+/* Deferred error settings */
+#define MSR_CU_DEF_ERR 0xC0000410
+#define MASK_DEF_LVTOFF 0x000000F0
+#define MASK_DEF_INT_TYPE 0x00000006
+#define DEF_LVT_OFF 0x2
+#define DEF_INT_TYPE_APIC 0x2
+
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -60,6 +70,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */

static void amd_threshold_interrupt(void);
+static void amd_deferred_error_interrupt(void);
+
+static void default_deferred_error_interrupt(void)
+{
+ pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
+}
+void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;

/*
* CPU Initialization
@@ -205,6 +222,39 @@ static int setup_APIC_mce(int reserved, int new)
return reserved;
}

+static int setup_APIC_deferred_error(int reserved, int new)
+{
+ if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
+ APIC_EILVT_MSG_FIX, 0))
+ return new;
+
+ return reserved;
+}
+
+static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
+{
+ u32 low = 0, high = 0;
+ int def_offset = -1, def_new;
+
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
+ return;
+
+ def_new = (low & MASK_DEF_LVTOFF) >> 4;
+ if (!(low & MASK_DEF_LVTOFF)) {
+ pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
+ def_new = DEF_LVT_OFF;
+ low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
+ }
+
+ def_offset = setup_APIC_deferred_error(def_offset, def_new);
+ if ((def_offset == def_new) &&
+ (deferred_error_int_vector != amd_deferred_error_interrupt))
+ deferred_error_int_vector = amd_deferred_error_interrupt;
+
+ low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+ wrmsr(MSR_CU_DEF_ERR, low, high);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -262,6 +312,9 @@ init:
mce_threshold_block_init(&b, offset);
}
}
+
+ if (mce_flags.succor)
+ deferred_error_interrupt_enable(c);
}

static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
@@ -288,6 +341,46 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
}

+static inline void __smp_deferred_error_interrupt(void)
+{
+ inc_irq_stat(irq_deferred_error_count);
+ deferred_error_int_vector();
+}
+
+asmlinkage __visible void smp_deferred_error_interrupt(void)
+{
+ entering_irq();
+ __smp_deferred_error_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+{
+ entering_irq();
+ trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
+ __smp_deferred_error_interrupt();
+ trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
+ exiting_ack_irq();
+}
+
+/* APIC interrupt handler for deferred errors */
+static void amd_deferred_error_interrupt(void)
+{
+ u64 status;
+ unsigned int bank;
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+
+ if (!(status & MCI_STATUS_VAL) ||
+ !(status & MCI_STATUS_DEFERRED))
+ continue;
+
+ __log_error(bank, false, 0);
+ break;
+ }
+}
+
/*
* APIC Interrupt Handler
*/
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 02c2eff..12aea85 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -935,6 +935,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
#endif

+#ifdef CONFIG_X86_MCE_AMD
+apicinterrupt DEFERRED_ERROR_VECTOR \
+ deferred_error_interrupt smp_deferred_error_interrupt
+#endif
+
#ifdef CONFIG_X86_THERMAL_VECTOR
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e5952c2..590ed6c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -116,6 +116,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_puts(p, " Threshold APIC interrupts\n");
#endif
+#ifdef CONFIG_X86_MCE_AMD
+ seq_printf(p, "%*s: ", prec, "DFR");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
+ seq_puts(p, " Deferred Error APIC interrupts\n");
+#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index cd10a64..d7ec6e7 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif

+#ifdef CONFIG_X86_MCE_AMD
+ alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
+#endif
+
#ifdef CONFIG_X86_LOCAL_APIC
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab52..68b1d59 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -827,6 +827,11 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
{
}

+asmlinkage __visible void __attribute__((weak))
+smp_deferred_error_interrupt(void)
+{
+}
+
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/