[PATCH 2/2] UV: NMI: simple dump failover if kdump fails

From: George Beshers
Date: Thu Apr 30 2015 - 09:36:22 EST


UV: NMI: simple dump failover if kdump fails

The ability to trigger a kdump using the system NMI command
was added by

commit 12ba6c990fab50fe568f3ad8715e81e356552428
Author: Mike Travis <travis@xxxxxxx>
Date: Mon Sep 23 16:25:03 2013 -0500

When kdump is works it is preferable to the set of backtraces
that dump provides; however a number of things can go wrong and
the backtraces are much more useful than nothing.

The two most common reason for kdump not to be available are
a problem during boot or the kdump daemon fails to start.
In either case the call to crash_kexec() returns unexpectedly;
when this happens uv_nmi_kdump() also returns with the
uv_nmi_kexec_failed flag set. This condition now causes a
standard dump.

One other minor change is that dump now generates both the
show_regs() stack trace and the uv_nmi_dump_ip{,_hdr} information
that is generated by the "ips" action; the additional information
has proved to be useful.

Signed-off-by: George Beshers <gbeshers@xxxxxxx>
Acked-by: Mike Travis <travis@xxxxxxx>
Cc: Russ Anderson <rja@xxxxxxx>
Cc: Dimitri Sivanich <sivanich@xxxxxxx>
Cc: Hedi Berriche <hedi@xxxxxxx>
Cc: Alex Thorlton <athorlton@xxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>

diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 7488caf..89f37c7 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -391,23 +391,27 @@ static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
printk_address(regs->ip);
}

-/* Dump this cpu's state */
+/*
+ * Dump this cpu's state. Note that "kdump" only happens
+ * when crash_kexec() has failed and we are providing the user
+ * a standard dump instead.
+ */
static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
{
const char *dots = " ................................. ";

- if (uv_nmi_action_is("ips")) {
- if (cpu == 0)
- uv_nmi_dump_cpu_ip_hdr();
-
- if (current->pid != 0)
- uv_nmi_dump_cpu_ip(cpu, regs);
-
- } else if (uv_nmi_action_is("dump")) {
+ if (uv_nmi_action_is("dump") || uv_nmi_action_is("kdump")) {
printk(KERN_DEFAULT
"UV:%sNMI process trace for CPU %d\n", dots, cpu);
show_regs(regs);
}
+
+ if (cpu == 0)
+ uv_nmi_dump_cpu_ip_hdr();
+
+ if (current->pid != 0)
+ uv_nmi_dump_cpu_ip(cpu, regs);
+
this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
}

@@ -492,8 +496,9 @@ static void uv_nmi_touch_watchdogs(void)
touch_nmi_watchdog();
}

-#if defined(CONFIG_KEXEC)
static atomic_t uv_nmi_kexec_failed;
+
+#if defined(CONFIG_KEXEC)
static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
{
/* Call crash to dump system state */
@@ -502,9 +507,9 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
crash_kexec(regs);

pr_emerg("UV: crash_kexec unexpectedly returned, ");
+ atomic_set(&uv_nmi_kexec_failed, 1);
if (!kexec_crash_image) {
pr_cont("crash kernel not loaded\n");
- atomic_set(&uv_nmi_kexec_failed, 1);
uv_nmi_sync_exit(1);
return;
}
@@ -524,6 +529,7 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
{
if (master)
pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
+ atomic_set(&uv_nmi_kexec_failed, 1);
}
#endif /* !CONFIG_KEXEC */

@@ -620,7 +626,8 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
uv_nmi_wait(master);

/* Dump state of each cpu */
- if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump"))
+ if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump") ||
+ atomic_read(&uv_nmi_kexec_failed) == 1)
uv_nmi_dump_state(cpu, regs, master);

/* Call KGDB/KDB if enabled */
@@ -640,6 +647,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
atomic_set(&uv_nmi_cpus_in_nmi, -1);
atomic_set(&uv_nmi_cpu, -1);
atomic_set(&uv_in_nmi, 0);
+ atomic_set(&uv_nmi_kexec_failed, 0);
}

uv_nmi_touch_watchdogs();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/