[PATCH v2 09/13] io_uring/rsrc: don't offload node free

From: Pavel Begunkov
Date: Tue Apr 04 2023 - 08:41:34 EST


struct delayed_work rsrc_put_work was previously used to offload node
freeing because io_rsrc_node_ref_zero() was previously called by RCU in
the IRQ context. Now, as percpu refcounting is gone, we can do it
eagerly at the spot without pushing it to a worker.

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
include/linux/io_uring_types.h | 3 --
io_uring/io_uring.c | 6 ----
io_uring/rsrc.c | 59 +++-------------------------------
3 files changed, 4 insertions(+), 64 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9492889f00c0..47496059e13a 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -330,9 +330,6 @@ struct io_ring_ctx {
struct io_rsrc_data *file_data;
struct io_rsrc_data *buf_data;

- struct delayed_work rsrc_put_work;
- struct callback_head rsrc_put_tw;
- struct llist_head rsrc_put_llist;
/* protected by ->uring_lock */
struct list_head rsrc_ref_list;

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 764df5694d73..d6a0025afc31 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -326,9 +326,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->timeout_list);
INIT_LIST_HEAD(&ctx->ltimeout_list);
INIT_LIST_HEAD(&ctx->rsrc_ref_list);
- INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
- init_task_work(&ctx->rsrc_put_tw, io_rsrc_put_tw);
- init_llist_head(&ctx->rsrc_put_llist);
init_llist_head(&ctx->work_llist);
INIT_LIST_HEAD(&ctx->tctx_list);
ctx->submit_state.free_list.next = NULL;
@@ -2821,11 +2818,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_rsrc_node_destroy(ctx->rsrc_node);
if (ctx->rsrc_backup_node)
io_rsrc_node_destroy(ctx->rsrc_backup_node);
- flush_delayed_work(&ctx->rsrc_put_work);
- flush_delayed_work(&ctx->fallback_work);

WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
- WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));

#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 9647c02be0dc..77cb2f8cfd68 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -145,15 +145,8 @@ static void io_rsrc_put_work_one(struct io_rsrc_data *rsrc_data,
{
struct io_ring_ctx *ctx = rsrc_data->ctx;

- if (prsrc->tag) {
- if (ctx->flags & IORING_SETUP_IOPOLL) {
- mutex_lock(&ctx->uring_lock);
- io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
- mutex_unlock(&ctx->uring_lock);
- } else {
- io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
- }
- }
+ if (prsrc->tag)
+ io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
rsrc_data->do_put(ctx, prsrc);
}

@@ -176,32 +169,6 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
complete(&rsrc_data->done);
}

-void io_rsrc_put_work(struct work_struct *work)
-{
- struct io_ring_ctx *ctx;
- struct llist_node *node;
-
- ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
- node = llist_del_all(&ctx->rsrc_put_llist);
-
- while (node) {
- struct io_rsrc_node *ref_node;
- struct llist_node *next = node->next;
-
- ref_node = llist_entry(node, struct io_rsrc_node, llist);
- __io_rsrc_put_work(ref_node);
- node = next;
- }
-}
-
-void io_rsrc_put_tw(struct callback_head *cb)
-{
- struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx,
- rsrc_put_tw);
-
- io_rsrc_put_work(&ctx->rsrc_put_work.work);
-}
-
void io_wait_rsrc_data(struct io_rsrc_data *data)
{
if (data && !atomic_dec_and_test(&data->refs))
@@ -217,34 +184,18 @@ void io_rsrc_node_ref_zero(struct io_rsrc_node *node)
__must_hold(&node->rsrc_data->ctx->uring_lock)
{
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
- bool first_add = false;
- unsigned long delay = HZ;

node->done = true;
-
- /* if we are mid-quiesce then do not delay */
- if (node->rsrc_data->quiesce)
- delay = 0;
-
while (!list_empty(&ctx->rsrc_ref_list)) {
node = list_first_entry(&ctx->rsrc_ref_list,
struct io_rsrc_node, node);
/* recycle ref nodes in order */
if (!node->done)
break;
- list_del(&node->node);
- first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
- }

- if (!first_add)
- return;
-
- if (ctx->submitter_task) {
- if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw,
- ctx->notify_method))
- return;
+ list_del(&node->node);
+ __io_rsrc_put_work(node);
}
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}

static struct io_rsrc_node *io_rsrc_node_alloc(void)
@@ -320,13 +271,11 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
if (ret < 0) {
atomic_inc(&data->refs);
/* wait for all works potentially completing data->done */
- flush_delayed_work(&ctx->rsrc_put_work);
reinit_completion(&data->done);
mutex_lock(&ctx->uring_lock);
break;
}

- flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
if (!ret) {
mutex_lock(&ctx->uring_lock);
--
2.39.1