[PATCH 2/3] kernel/watchdog: suppress max irq when irq floods

From: Pingfan Liu
Date: Thu Oct 22 2020 - 01:56:46 EST


The capture kernel should try its best to save the crash info. Normally,
irq flood is caused by some trivial devices, which has no impact on saving
vmcore.

Introducing a parameter "irqflood_suppress" to enable suppress irq flood.

Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Jisheng Zhang <Jisheng.Zhang@xxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: "Guilherme G. Piccoli" <gpiccoli@xxxxxxxxxxxxx>
Cc: Petr Mladek <pmladek@xxxxxxxx>
Cc: Marc Zyngier <maz@xxxxxxxxxx>
Cc: Linus Walleij <linus.walleij@xxxxxxxxxx>
Cc: afzal mohammed <afzal.mohd.ma@xxxxxxxxx>
Cc: Lina Iyer <ilina@xxxxxxxxxxxxxx>
Cc: "Gustavo A. R. Silva" <gustavo@xxxxxxxxxxxxxx>
Cc: Maulik Shah <mkshah@xxxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Jonathan Corbet <corbet@xxxxxxx>
Cc: Pawan Gupta <pawan.kumar.gupta@xxxxxxxxxxxxxxx>
Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Cc: Oliver Neukum <oneukum@xxxxxxxx>
To: linux-kernel@xxxxxxxxxxxxxxx
Cc: linux-doc@xxxxxxxxxxxxxxx
Cc: kexec@xxxxxxxxxxxxxxxxxxx
---
include/linux/irq.h | 2 ++
kernel/irq/spurious.c | 32 ++++++++++++++++++++++++++++++++
kernel/watchdog.c | 9 ++++++++-
3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1b7f4df..140cb61 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -684,6 +684,8 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);
/* Enable/disable irq debugging output: */
extern int noirqdebug_setup(char *str);

+void suppress_max_irq(void);
+
/* Checks whether the interrupt can be requested by request_irq(): */
extern int can_request_irq(unsigned int irq, unsigned long irqflags);

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index f865e5f..d3d94d6 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -464,3 +464,35 @@ static int __init irqpoll_setup(char *str)
}

__setup("irqpoll", irqpoll_setup);
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+
+static bool irqflood_suppress;
+
+static int __init irqflood_suppress_setup(char *str)
+{
+ irqflood_suppress = true;
+ pr_info("enable auto suppress irqflood\n");
+ return 1;
+}
+__setup("irqflood_suppress", irqflood_suppress_setup);
+
+void suppress_max_irq(void)
+{
+ unsigned int tmp, maxirq = 0, max = 0;
+ int irq;
+
+ if (!irqflood_suppress)
+ return;
+ for_each_active_irq(irq) {
+ tmp = kstat_irqs_cpu(irq, smp_processor_id());
+ if (max < tmp) {
+ maxirq = irq;
+ max = tmp;
+ }
+ }
+ pr_warn("Suppress irq:%u, which is triggered %u times\n",
+ maxirq, max);
+ disable_irq_nosync(maxirq);
+}
+#endif
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 230ac38..28a74e5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,6 +24,7 @@
#include <linux/sched/isolation.h>
#include <linux/stop_machine.h>
#include <linux/kernel_stat.h>
+#include <linux/irq.h>

#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
@@ -364,9 +365,15 @@ static void check_irq_flood(void)
percent = irqts * 100 / totalts;
percent = percent < 100 ? percent : 100;
__this_cpu_write(check_hint, -1);
- if (percent >= 98)
+ if (percent >= 98) {
pr_info("Irq flood occupies more than %lu%% of the past %lu seconds\n",
percent, totalts >> 30);
+ /*
+ * Suppress top irq when scheduler does not work for long time and irq
+ * occupies too much time.
+ */
+ suppress_max_irq();
+ }
} else if (cnt == 0) {
__this_cpu_write(last_total_ts, totalts);
__this_cpu_write(last_irq_ts, irqts);
--
2.7.5