[PATCH 3/6] genirq: soft_moderation: activate hooks in handle_irq_event()

From: Luigi Rizzo

Date: Wed Nov 12 2025 - 14:24:29 EST


Activate soft_moderation via the hooks in handle_irq_event()
and per-CPU and irq_desc initialization.

This change only implements fixed moderation. It needs to be
explicitly enabled at runtime on individual interrupts.

Example (kernel built with CONFIG_SOFT_IRQ_MODERATION=y)

# enable fixed moderation
echo "delay_us=400" > /proc/irq/soft_moderation

# enable on network interrupts (change name as appropriate)
echo on | tee /proc/irq/*/*eth*/../soft_moderation

# show it works by looking at counters in /proc/irq/soft_moderation
cat /proc/irq/soft_moderation

# Show runtime impact on ping times changing delay_us
ping -n -f -q -c 1000 ${some_nearby_host}
echo "delay_us=100" > /proc/irq/soft_moderation
ping -n -f -q -c 1000 ${some_nearby_host}

Configuration via module parameters (irq_moderation.${name}=${value}) or
echo "${name}=${value}" > /proc/irq/soft_moderation)

delay_us 0=off, range 1-500, default 100
how long an interrupt is disabled after it fires. Small values are
accumulated until they are large enough, e.g. 10us. As an example, a 2us value
means that the timer is set only every 5 interrupts.

timer_rounds 0-20, default 0
How many extra timer runs before re-enabling interrupts. This allows
reducing the number of MSI interrupts while keeping delay_us small.
This is similar to the "napi_defer_hard_irqs" option in NAPI, but with
some subtle differences (e.g. here the number of rounds is
deterministic, and interrupts are disabled at MSI level).

Change-Id: I47c5059ad537fcb9561f924620cf68e1d648aae6
---
arch/x86/kernel/cpu/common.c | 1 +
drivers/irqchip/irq-gic-v3.c | 2 ++
kernel/irq/handle.c | 3 +++
kernel/irq/irqdesc.c | 1 +
4 files changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 02d97834a1d4d..1953419fde6ff 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2440,6 +2440,7 @@ void cpu_init(void)

intel_posted_msi_init();
}
+ irq_moderation_percpu_init();

mmgrab(&init_mm);
cur->active_mm = &init_mm;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 3de351e66ee84..902bcbf9d85d8 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1226,6 +1226,8 @@ static void gic_cpu_sys_reg_init(void)
WARN_ON(gic_dist_security_disabled() != cpus_have_security_disabled);
}

+ irq_moderation_percpu_init();
+
/*
* Some firmwares hand over to the kernel with the BPR changed from
* its reset value (and with a value large enough to prevent
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e103451243a0b..2cacceaaea9d0 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
+#include <linux/irq_moderation.h>
#include <linux/kernel_stat.h>

#include <asm/irq_regs.h>
@@ -254,9 +255,11 @@ irqreturn_t handle_irq_event(struct irq_desc *desc)
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);

+ irq_moderation_hook(desc); /* may disable irq so must run unlocked */
ret = handle_irq_event_percpu(desc);

raw_spin_lock(&desc->lock);
+ irq_moderation_epilogue(desc); /* start moderation timer if needed */
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
return ret;
}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index db714d3014b5f..e3efbecf5b937 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -134,6 +134,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
desc->tot_count = 0;
desc->name = NULL;
desc->owner = owner;
+ irq_moderation_init_fields(desc);
for_each_possible_cpu(cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
desc_smp_init(desc, node, affinity);
--
2.51.2.1041.gc1ab5b90ca-goog