[PATCH 5/8] x86/platform/UV: Add basic CPU NMI health check
From: Mike Travis
Date: Fri Jan 13 2017 - 10:21:30 EST
Add a low impact health check triggered by the system NMI command
that essentially checks which CPUs are responding to external NMI's.
Signed-off-by: Mike Travis <travis@xxxxxxx>
Acked-by: Dimitri Sivanich <sivanich@xxxxxxx>
Reviewed-by: Russ Anderson <rja@xxxxxxx>
Reviewed-by: Alex Thorlton <athorlton@xxxxxxx>
---
arch/x86/platform/uv/uv_nmi.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
--- linux.orig/arch/x86/platform/uv/uv_nmi.c
+++ linux/arch/x86/platform/uv/uv_nmi.c
@@ -177,6 +177,7 @@ module_param_named(debug, uv_nmi_debug,
* "kdump" - do crash dump
* "kdb" - enter KDB (default)
* "kgdb" - enter KGDB
+ * "health" - check if CPUs respond to NMI
*/
static char uv_nmi_action[8] = "kdb";
module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644);
@@ -573,6 +574,22 @@ static void uv_nmi_sync_exit(int master)
}
}
+/* Current "health" check is which CPU's are responsive */
+static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master)
+{
+ if (master) {
+ int in = atomic_read(&uv_nmi_cpus_in_nmi);
+ int out = num_online_cpus() - in;
+
+ pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out);
+ atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
+ } else {
+ while (!atomic_read(&uv_nmi_slave_continue))
+ cpu_relax();
+ }
+ uv_nmi_sync_exit(master);
+}
+
/* Walk through cpu list and dump state of each */
static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
{
@@ -748,8 +765,12 @@ int uv_handle_nmi(unsigned int reason, s
/* Pause as all cpus enter the NMI handler */
uv_nmi_wait(master);
+ /* Health check for any processors that are 'stuck' */
+ if (uv_nmi_action_is("health"))
+ uv_nmi_action_health(cpu, regs, master);
+
/* Dump state of each cpu */
- if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump"))
+ else if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump"))
uv_nmi_dump_state(cpu, regs, master);
/* Call KGDB/KDB if enabled */
--