[PATCH net v2] net: napi: Avoid gro timer misfiring at end of busypoll

From: Dragos Tatulea

Date: Wed May 06 2026 - 05:13:32 EST


When in irq deferral mode (defer-hard-irqs > 0), a short enough
gro-flush timeout can trigger before NAPI_STATE_SCHED is cleared if the
last poll in busy_poll_stop() takes too long. This can have the effect
of leaving the queue stuck with interrupts disabled and no timer armed
which results in a tx timeout if there is no subsequent busypoll cycle.

To prevent this, defer the gro-flush timer arm after the last poll.

Fixes: 7fd3253a7de6 ("net: Introduce preferred busy-polling")
Co-developed-by: Martin Karsten <mkarsten@xxxxxxxxxxxx>
Signed-off-by: Martin Karsten <mkarsten@xxxxxxxxxxxx>
Signed-off-by: Dragos Tatulea <dtatulea@xxxxxxxxxx>
Reviewed-by: Tariq Toukan <tariqt@xxxxxxxxxx>
Reviewed-by: Cosmin Ratiu <cratiu@xxxxxxxxxx>
---
Changes since RFC [1]:
- Sending only fix to net.
- Made commit message clearer and more succint.
- Fixed timer arming to happen after clearing the NAPI_STATE_SCHED bit
- Arm timer after clearing NAPI_STATE_SCHED and drop IRQ disable.

[1] https://lore.kernel.org/all/20260428175134.1197036-3-dtatulea@xxxxxxxxxx/
---
net/core/dev.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 06c195906231..3ebd69988d51 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6862,9 +6862,9 @@ static void skb_defer_free_flush(void)

#if defined(CONFIG_NET_RX_BUSY_POLL)

-static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
+static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout)
{
- if (!skip_schedule) {
+ if (!timeout) {
gro_normal_list(&napi->gro);
__napi_schedule(napi);
return;
@@ -6874,6 +6874,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
gro_flush_normal(&napi->gro, HZ >= 1000);

clear_bit(NAPI_STATE_SCHED, &napi->state);
+ hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
}

enum {
@@ -6885,8 +6887,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
unsigned flags, u16 budget)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
- bool skip_schedule = false;
- unsigned long timeout;
+ unsigned long timeout = 0;
int rc;

/* Busy polling means there is a high chance device driver hard irq
@@ -6906,10 +6907,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,

if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
- timeout = napi_get_gro_flush_timeout(napi);
- if (napi->defer_hard_irqs_count && timeout) {
- hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
- skip_schedule = true;
+ if (napi->defer_hard_irqs_count) {
+ /* A short enough gro flush timeout and long enough
+ * poll can result in timer firing too early.
+ * Timer will be armed later if necessary.
+ */
+ timeout = napi_get_gro_flush_timeout(napi);
}
}

@@ -6924,7 +6927,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
trace_napi_poll(napi, rc, budget);
netpoll_poll_unlock(have_poll_lock);
if (rc == budget)
- __busy_poll_stop(napi, skip_schedule);
+ __busy_poll_stop(napi, timeout);
bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
}
--
2.43.0