[PATCH] rcu: allow multiple stalls before panic

From: chao
Date: Mon Aug 31 2020 - 02:42:16 EST


Some stalls are transient and system can fully recover.
Allow users to configure the number of stalls experienced
to trigger kernel Panic.

Signed-off-by: chao <chao@xxxxxxxx>
---
include/linux/kernel.h | 1 +
kernel/rcu/tree_stall.h | 6 ++++++
kernel/sysctl.c | 11 +++++++++++
3 files changed, 18 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 500def6..fc2dd3f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -536,6 +536,7 @@ extern int panic_on_warn;
extern unsigned long panic_on_taint;
extern bool panic_on_taint_nousertaint;
extern int sysctl_panic_on_rcu_stall;
+extern int sysctl_max_rcu_stall_to_panic;
extern int sysctl_panic_on_stackoverflow;

extern bool crash_kexec_post_notifiers;
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index b5d3b47..2047423 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -13,6 +13,7 @@

/* panic() on RCU Stall sysctl. */
int sysctl_panic_on_rcu_stall __read_mostly;
+int sysctl_max_rcu_stall_to_panic __read_mostly;

#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
@@ -106,6 +107,11 @@ early_initcall(check_cpu_stall_init);
/* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
static void panic_on_rcu_stall(void)
{
+ static int cpu_stall;
+
+ if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
+ return;
+
if (sysctl_panic_on_rcu_stall)
panic("RCU Stall\n");
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 287862f..1bca490 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2651,6 +2651,17 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
+#if defined(CONFIG_TREE_RCU)
+ {
+ .procname = "max_rcu_stall_to_panic",
+ .data = &sysctl_max_rcu_stall_to_panic,
+ .maxlen = sizeof(sysctl_max_rcu_stall_to_panic),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
+ },
+#endif
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
{
.procname = "stack_erasing",
--
2.7.4