[RFC PATCH 18/23] watchdog/hardlockup/hpet: Add the NMI watchdog operations

From: Ricardo Neri
Date: Tue Jun 12 2018 - 21:03:26 EST


Implement the start, stop and disable operations of the HPET-based NMI
watchdog. Given that a single timer is used to monitor all the CPUs in
the system, it is necessary to define a cpumask that keeps track of the
CPUs that can be monitored. This cpumask is protected with a spin lock.

As individual CPUs are put online and offline, this cpumask is updated.
CPUs are unconditionally cleared from the mask when going offline. When
going online, the CPU is set in the mask only if is one of the CPUs allowed
to be monitored by the watchdog.

It is not necessary to implement a start function. The NMI watchdog will
be enabled when there is at least one CPU to monitor.

The disable function clears the CPU mask and disables the timer.

Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
Cc: Andi Kleen <andi.kleen@xxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxx>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@xxxxxxxxx>
Cc: Don Zickus <dzickus@xxxxxxxxxx>
Cc: Nicholas Piggin <npiggin@xxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Frederic Weisbecker <frederic@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Babu Moger <babu.moger@xxxxxxxxxx>
Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Philippe Ombredanne <pombredanne@xxxxxxxx>
Cc: Colin Ian King <colin.king@xxxxxxxxxxxxx>
Cc: Byungchul Park <byungchul.park@xxxxxxx>
Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: "Luis R. Rodriguez" <mcgrof@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx>
Cc: Davidlohr Bueso <dave@xxxxxxxxxxxx>
Cc: Christoffer Dall <cdall@xxxxxxxxxx>
Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
Cc: Kai-Heng Feng <kai.heng.feng@xxxxxxxxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: "Ravi V. Shankar" <ravi.v.shankar@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/hpet.h | 2 +
include/linux/nmi.h | 1 +
kernel/watchdog_hld_hpet.c | 98 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 101 insertions(+)

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 33309b7..6ace2d1 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -124,6 +124,8 @@ struct hpet_hld_data {
u32 irq;
u32 flags;
u64 ticks_per_second;
+ struct cpumask monitored_mask;
+ spinlock_t lock; /* serialized access to monitored_mask */
};

extern struct hpet_hld_data *hpet_hardlockup_detector_assign_timer(void);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e608762..23e20d2 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -129,6 +129,7 @@ struct nmi_watchdog_ops {
};

extern struct nmi_watchdog_ops hardlockup_detector_perf_ops;
+extern struct nmi_watchdog_ops hardlockup_detector_hpet_ops;

void watchdog_nmi_stop(void);
void watchdog_nmi_start(void);
diff --git a/kernel/watchdog_hld_hpet.c b/kernel/watchdog_hld_hpet.c
index 3bedffa..857e051 100644
--- a/kernel/watchdog_hld_hpet.c
+++ b/kernel/watchdog_hld_hpet.c
@@ -345,6 +345,91 @@ static int setup_hpet_irq(struct hpet_hld_data *hdata)
}

/**
+ * hardlockup_detector_hpet_enable() - Enable the hardlockup detector
+ *
+ * The hardlockup detector is enabled for the CPU that executes the
+ * function. It is only enabled if such CPU is allowed to be monitored
+ * by the lockup detector.
+ *
+ * Returns:
+ *
+ * None
+ *
+ */
+static void hardlockup_detector_hpet_enable(void)
+{
+ struct cpumask *allowed = watchdog_get_allowed_cpumask();
+ unsigned int cpu = smp_processor_id();
+
+ if (!hld_data)
+ return;
+
+ if (!cpumask_test_cpu(cpu, allowed))
+ return;
+
+ spin_lock(&hld_data->lock);
+
+ cpumask_set_cpu(cpu, &hld_data->monitored_mask);
+
+ /*
+ * If this is the first CPU to be monitored, set everything in motion:
+ * move the interrupt to this CPU, kick and enable the timer.
+ */
+ if (cpumask_weight(&hld_data->monitored_mask) == 1) {
+ if (irq_set_affinity(hld_data->irq, cpumask_of(cpu))) {
+ spin_unlock(&hld_data->lock);
+ pr_err("Unable to enable on CPU %d.!\n", cpu);
+ return;
+ }
+
+ kick_timer(hld_data);
+ enable(hld_data);
+ }
+
+ spin_unlock(&hld_data->lock);
+}
+
+/**
+ * hardlockup_detector_hpet_disable() - Disable the hardlockup detector
+ *
+ * The hardlockup detector is disabled for the CPU that executes the
+ * function.
+ *
+ * None
+ */
+static void hardlockup_detector_hpet_disable(void)
+{
+ if (!hld_data)
+ return;
+
+ spin_lock(&hld_data->lock);
+
+ cpumask_clear_cpu(smp_processor_id(), &hld_data->monitored_mask);
+
+ /* Only disable the timer if there are no more CPUs to monitor. */
+ if (!cpumask_weight(&hld_data->monitored_mask))
+ disable(hld_data);
+
+ spin_unlock(&hld_data->lock);
+}
+
+/**
+ * hardlockup_detector_hpet_stop() - Stop the NMI watchdog on all CPUs
+ *
+ * Returns:
+ *
+ * None
+ */
+static void hardlockup_detector_hpet_stop(void)
+{
+ disable(hld_data);
+
+ spin_lock(&hld_data->lock);
+ cpumask_clear(&hld_data->monitored_mask);
+ spin_unlock(&hld_data->lock);
+}
+
+/**
* hardlockup_detector_hpet_init() - Initialize the hardlockup detector
*
* Only initialize and configure the detector if an HPET is available on the
@@ -383,5 +468,18 @@ static int __init hardlockup_detector_hpet_init(void)
*/
disable(hld_data);

+ spin_lock_init(&hld_data->lock);
+
+ spin_lock(&hld_data->lock);
+ cpumask_clear(&hld_data->monitored_mask);
+ spin_unlock(&hld_data->lock);
+
return 0;
}
+
+struct nmi_watchdog_ops hardlockup_detector_hpet_ops = {
+ .init = hardlockup_detector_hpet_init,
+ .enable = hardlockup_detector_hpet_enable,
+ .disable = hardlockup_detector_hpet_disable,
+ .stop = hardlockup_detector_hpet_stop
+};
--
2.7.4