[PATCH RFC 3/3] workqueue: dump the last woken worker for stalled pools
From: Breno Leitao
Date: Tue Jun 16 2026 - 12:52:15 EST
To identify the task most likely responsible for a stall, add
last_woken_worker (L: pool->lock) to worker_pool and record it in
kick_pool() just before wake_up_process(). This captures the idle
worker that was kicked to take over when the last running worker went to
sleep; if the pool is now stuck with no running worker, that task is the
prime suspect and its backtrace is dumped by show_pool_no_running_worker().
Using struct worker * rather than struct task_struct * avoids any
lifetime concern: workers are only destroyed via set_worker_dying()
which requires pool->lock, and set_worker_dying() clears
last_woken_worker when the dying worker matches.
show_cpu_pool_busy_workers() holds pool->lock while calling
sched_show_task(), so last_woken_worker is either NULL or points to a
live worker with a valid task. More precisely, set_worker_dying() clears
last_woken_worker before setting WORKER_DIE, so a non-NULL
last_woken_worker means the kthread has not yet exited and worker->task
is still alive.
Suggested-by: Petr Mladek <pmladek@xxxxxxxx>
Signed-off-by: Breno Leitao <leitao@xxxxxxxxxx>
---
kernel/workqueue.c | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index db6287cd39588..6870b765c9ac8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -226,6 +226,7 @@ struct worker_pool {
/* L: hash of busy workers */
struct worker *manager; /* L: purely informational */
+ struct worker *last_woken_worker; /* L: last worker woken by kick_pool() */
struct list_head workers; /* A: attached workers */
struct ida worker_ida; /* worker IDs for task name */
@@ -1310,6 +1311,9 @@ static bool kick_pool(struct worker_pool *pool)
}
}
#endif
+ /* Track the last idle worker woken, used for stall diagnostics. */
+ pool->last_woken_worker = worker;
+
wake_up_process(p);
return true;
}
@@ -2948,6 +2952,13 @@ static void set_worker_dying(struct worker *worker, struct list_head *list)
pool->nr_workers--;
pool->nr_idle--;
+ /*
+ * Clear last_woken_worker if it points to this worker, so that
+ * show_cpu_pool_busy_workers() cannot dereference a freed worker.
+ */
+ if (pool->last_woken_worker == worker)
+ pool->last_woken_worker = NULL;
+
worker->flags |= WORKER_DIE;
list_move(&worker->entry, list);
@@ -7707,13 +7718,25 @@ static void show_pool_no_running_worker(struct worker_pool *pool)
idle_cpu(pool->cpu) ? "idle" : "busy",
pool->nr_workers, pool->nr_idle);
pr_info("The pool might have trouble waking an idle worker.\n");
+ /*
+ * last_woken_worker and its task are valid here: set_worker_dying()
+ * clears it under pool->lock before setting WORKER_DIE, so if
+ * last_woken_worker is non-NULL the kthread has not yet exited and
+ * worker->task is still alive.
+ */
+ if (pool->last_woken_worker) {
+ pr_info("Backtrace of last woken worker:\n");
+ sched_show_task(pool->last_woken_worker->task);
+ } else {
+ pr_info("Last woken worker empty\n");
+ }
printk_deferred_exit();
}
/*
* Show running workers that might prevent the processing of pending work items.
* If no running worker is found, the pool may be stuck waiting for an idle
- * worker to be woken, so report the pool state.
+ * worker to be woken, so report the pool state and the last woken worker.
*/
static void show_cpu_pool_busy_workers(struct worker_pool *pool)
{
@@ -7748,7 +7771,8 @@ static void show_cpu_pool_busy_workers(struct worker_pool *pool)
/*
* If no running worker was found, the pool is likely stuck. Print pool
- * state.
+ * state and the backtrace of the last woken worker, which is the prime
+ * suspect for the stall.
*/
if (!found_running)
show_pool_no_running_worker(pool);
--
2.53.0-Meta