[PATCH 1/4] softirq: implement IRQ flood detection mechanism

From: Ming Lei
Date: Tue Aug 27 2019 - 04:54:13 EST


For some high performance IO devices, interrupt may come very frequently,
meantime IO request completion may take a bit time. Especially on some
devices(SCSI or NVMe), IO requests can be submitted concurrently from
multiple CPU cores, however IO completion is only done on one of
these submission CPU cores.

Then IRQ flood can be easily triggered, and CPU lockup.

Implement one simple generic CPU IRQ flood detection mechanism. This
mechanism uses the CPU average interrupt interval to evaluate if IRQ flood
is triggered. The Exponential Weighted Moving Average(EWMA) is used to
compute CPU average interrupt interval.

Cc: Long Li <longli@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>,
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Keith Busch <keith.busch@xxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Sagi Grimberg <sagi@xxxxxxxxxxx>
Cc: John Garry <john.garry@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: linux-nvme@xxxxxxxxxxxxxxxxxxx
Cc: linux-scsi@xxxxxxxxxxxxxxx
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
drivers/base/cpu.c | 25 ++++++++++++++++++++++
include/linux/hardirq.h | 2 ++
kernel/softirq.c | 46 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 73 insertions(+)

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index cc37511de866..7277d1aa0906 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -20,6 +20,7 @@
#include <linux/tick.h>
#include <linux/pm_qos.h>
#include <linux/sched/isolation.h>
+#include <linux/hardirq.h>

#include "base.h"

@@ -183,10 +184,33 @@ static struct attribute_group crash_note_cpu_attr_group = {
};
#endif

+static ssize_t show_irq_interval(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ ssize_t rc;
+ int cpunum;
+
+ cpunum = cpu->dev.id;
+
+ rc = sprintf(buf, "%llu\n", irq_get_avg_interval(cpunum));
+ return rc;
+}
+
+static DEVICE_ATTR(irq_interval, 0400, show_irq_interval, NULL);
+static struct attribute *irq_interval_cpu_attrs[] = {
+ &dev_attr_irq_interval.attr,
+ NULL
+};
+static struct attribute_group irq_interval_cpu_attr_group = {
+ .attrs = irq_interval_cpu_attrs,
+};
+
static const struct attribute_group *common_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group,
#endif
+ &irq_interval_cpu_attr_group,
NULL
};

@@ -194,6 +218,7 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group,
#endif
+ &irq_interval_cpu_attr_group,
NULL
};

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index da0af631ded5..fd394060ddb3 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -8,6 +8,8 @@
#include <linux/vtime.h>
#include <asm/hardirq.h>

+extern u64 irq_get_avg_interval(int cpu);
+extern bool irq_flood_detected(void);

extern void synchronize_irq(unsigned int irq);
extern bool synchronize_hardirq(unsigned int irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0427a86743a4..96e01669a2e0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,7 @@
#include <linux/smpboot.h>
#include <linux/tick.h>
#include <linux/irq.h>
+#include <linux/sched/clock.h>

#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
@@ -52,6 +53,12 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
#endif

+struct irq_interval {
+ u64 last_irq_end;
+ u64 avg;
+};
+DEFINE_PER_CPU(struct irq_interval, avg_irq_interval);
+
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;

DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
@@ -339,6 +346,41 @@ asmlinkage __visible void do_softirq(void)
local_irq_restore(flags);
}

+/*
+ * Update average irq interval with the Exponential Weighted Moving
+ * Average(EWMA)
+ */
+static void irq_update_interval(void)
+{
+#define IRQ_INTERVAL_EWMA_WEIGHT 128
+#define IRQ_INTERVAL_EWMA_PREV_FACTOR 127
+#define IRQ_INTERVAL_EWMA_CURR_FACTOR (IRQ_INTERVAL_EWMA_WEIGHT - \
+ IRQ_INTERVAL_EWMA_PREV_FACTOR)
+
+ int cpu = raw_smp_processor_id();
+ struct irq_interval *inter = per_cpu_ptr(&avg_irq_interval, cpu);
+ u64 delta = sched_clock_cpu(cpu) - inter->last_irq_end;
+
+ inter->avg = (inter->avg * IRQ_INTERVAL_EWMA_PREV_FACTOR +
+ delta * IRQ_INTERVAL_EWMA_CURR_FACTOR) /
+ IRQ_INTERVAL_EWMA_WEIGHT;
+}
+
+u64 irq_get_avg_interval(int cpu)
+{
+ return per_cpu_ptr(&avg_irq_interval, cpu)->avg;
+}
+
+/*
+ * If the average CPU irq interval is less than 8us, we think interrupt
+ * flood is detected on this CPU
+ */
+bool irq_flood_detected(void)
+{
+#define IRQ_FLOOD_THRESHOLD_NS 8000
+ return raw_cpu_ptr(&avg_irq_interval)->avg <= IRQ_FLOOD_THRESHOLD_NS;
+}
+
/*
* Enter an interrupt context.
*/
@@ -356,6 +398,7 @@ void irq_enter(void)
}

__irq_enter();
+ irq_update_interval();
}

static inline void invoke_softirq(void)
@@ -402,6 +445,8 @@ static inline void tick_irq_exit(void)
*/
void irq_exit(void)
{
+ struct irq_interval *inter = raw_cpu_ptr(&avg_irq_interval);
+
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
local_irq_disable();
#else
@@ -413,6 +458,7 @@ void irq_exit(void)
invoke_softirq();

tick_irq_exit();
+ inter->last_irq_end = sched_clock_cpu(smp_processor_id());
rcu_irq_exit();
trace_hardirq_exit(); /* must be last! */
}
--
2.20.1