[PATCH] x86/platform/uv/BAU: gracefully disable BAU during panic

From: Andrew Banman
Date: Mon Aug 13 2018 - 10:01:27 EST


When we panic while the BAU is active, outstanding broadcasts may go
un-acknowledged by the kernel. These broadcasts cause timeouts in the
UV ASIC that tie up BAU resources, which may cause a fatal error that
brings down the system, thereby crashing the kdump kexec.

Add uv_bau_crash_shutdown() to bring BAU to quiescence ahead of the crash
shutdown routine saved in smp machine_ops. Assign uv_bau_crash_shutdown
to machine_ops during init.

Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Colin Ian King <colin.king@xxxxxxxxxxxxx>
Cc: Dimitri Sivanich <sivanich@xxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Acked-by: Mike Travis <mike.travis@xxxxxxx>
Signed-off-by: Andrew Banman <abanman@xxxxxxx>
---
arch/x86/platform/uv/tlb_uv.c | 49 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index e26dfad..dae6b3c 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -22,8 +22,13 @@
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <asm/timer.h>
+#include <asm/reboot.h>

static struct bau_operations ops __ro_after_init;
+#ifdef CONFIG_KEXEC_CORE
+static void (*crash_shutdown)(struct pt_regs *regs) __ro_after_init;
+static bool crash_in_progress;
+#endif

/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
static const int timeout_base_ns[] = {
@@ -2195,6 +2200,44 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
.wait_completion = uv4_wait_completion,
};

+#ifdef CONFIG_KEXEC_CORE
+/*
+ * Bring BAU to quiesence by abandoning current broadcasts and freeing up
+ * resources needed by firmware-initiated BAU broadcasts.
+ */
+void uv_bau_crash_shutdown(struct pt_regs *regs)
+{
+ int pnode = 0;
+ int uvhub = 0;
+
+ if (crash_in_progress)
+ return;
+ crash_in_progress = true;
+
+ /*
+ * Don't bother turning off BAU in the per-cpu structs. We free up
+ * enough INTD resources to accommodate any new broadcasts that
+ * may (however unlikley) start up before we complete the panic,
+ * without tying up FW-initiated General Broadcasts that must not
+ * time out.
+ */
+
+ for_each_possible_blade(uvhub) {
+ if (!uv_blade_nr_possible_cpus(uvhub))
+ continue;
+ pnode = uv_blade_to_pnode(uvhub);
+ /* Set STATUS registers to idle to free source cpus */
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0);
+ /* Clear TIMEOUT and PENDING bits to free up BAU resources */
+ ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF);
+ }
+
+ crash_shutdown(regs);
+}
+#endif
+
/*
* Initialization of BAU-related structures
*/
@@ -2267,6 +2310,12 @@ static int __init uv_bau_init(void)
}
}

+#ifdef CONFIG_KEXEC_CORE
+ crash_shutdown = machine_ops.crash_shutdown;
+ machine_ops.crash_shutdown = uv_bau_crash_shutdown;
+ crash_in_progress = false;
+#endif
+
return 0;

err_bau_disable:
--
1.8.2.1