[PATCH v1 1/2] io-wq: add exit-on-idle mode

From: Li Chen

Date: Mon Feb 02 2026 - 09:38:30 EST


io-wq uses an idle timeout to shrink the pool, but keeps the last worker
around indefinitely to avoid churn.

For tasks that used io_uring for file I/O and then stop using io_uring,
this can leave an iou-wrk-* thread behind even after all io_uring instances
are gone. This is unnecessary overhead and also gets in the way of process
checkpoint/restore.

Add an exit-on-idle mode that makes all io-wq workers exit as soon as they
become idle, and provide io_wq_set_exit_on_idle() to toggle it.

Signed-off-by: Li Chen <me@linux.beauty>
---
io_uring/io-wq.c | 31 +++++++++++++++++++++++++++++++
io_uring/io-wq.h | 1 +
2 files changed, 32 insertions(+)

diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 5d0928f37471..97e7eb847c6e 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -35,6 +35,7 @@ enum {

enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */
+ IO_WQ_BIT_EXIT_ON_IDLE = 1, /* allow all workers to exit on idle */
};

enum {
@@ -655,6 +656,18 @@ static int io_wq_worker(void *data)
io_worker_handle_work(acct, worker);

raw_spin_lock(&wq->lock);
+ /*
+ * If wq is marked idle-exit, drop this worker as soon as it
+ * becomes idle. This is used to avoid keeping io-wq worker
+ * threads around for tasks that no longer have any active
+ * io_uring instances.
+ */
+ if (test_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state)) {
+ acct->nr_workers--;
+ raw_spin_unlock(&wq->lock);
+ __set_current_state(TASK_RUNNING);
+ break;
+ }
/*
* Last sleep timed out. Exit if we're not the last worker,
* or if someone modified our affinity.
@@ -894,6 +907,24 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
return false;
}

+void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable)
+{
+ if (!wq->task)
+ return;
+
+ if (!enable) {
+ clear_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state);
+ return;
+ }
+
+ if (test_and_set_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state))
+ return;
+
+ rcu_read_lock();
+ io_wq_for_each_worker(wq, io_wq_worker_wake, NULL);
+ rcu_read_unlock();
+}
+
static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
{
do {
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index b3b004a7b625..f7f17a23693e 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -46,6 +46,7 @@ struct io_wq_data {
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
+void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable);

void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
--
2.52.0