[PATCH] [6/10] x86: MCE: Disable machine checks on offlined CPUs.

From: Andi Kleen
Date: Thu Feb 12 2009 - 07:44:34 EST



Impact: Lower priority bug fix

Offlined CPUs could still get machine checks, but the machine check handler
cannot handle them properly, leading to an unconditional crash. Disable
machine checks on CPUs that are going down.

Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>

---
arch/x86/kernel/cpu/mcheck/mce_64.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

Index: linux/arch/x86/kernel/cpu/mcheck/mce_64.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_64.c 2009-02-12 12:10:54.000000000 +0100
+++ linux/arch/x86/kernel/cpu/mcheck/mce_64.c 2009-02-12 12:12:26.000000000 +0100
@@ -881,6 +881,27 @@
cpu_clear(cpu, mce_device_initialized);
}

+/* Make sure there are no machine checks on offlined CPUs. */
+static void __cpuexit mce_disable_cpu(void *h)
+{
+ int i;
+
+ if (!mce_available(&current_cpu_data))
+ return;
+ for (i = 0; i < banks; i++)
+ wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+}
+
+static void __cpuexit mce_reenable_cpu(void *h)
+{
+ int i;
+
+ if (!mce_available(&current_cpu_data))
+ return;
+ for (i = 0; i < banks; i++)
+ wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+}
+
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
@@ -904,11 +925,13 @@
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
del_timer_sync(t);
+ smp_call_function_single(cpu, mce_disable_cpu, NULL, 1);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies_relative(jiffies + next_interval);
add_timer_on(t, cpu);
+ smp_call_function_single(cpu, mce_reenable_cpu, NULL, 1);
break;
}
return NOTIFY_OK;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/