[PATCH 5/5] io_uring: batch put_task_struct()

From: Pavel Begunkov
Date: Mon Jul 13 2020 - 19:44:10 EST


Each put_task_struct() is an atomic_dec. Do that in batches.

Tested io_uring-bench(iopoll,QD=128) with a custom nullblk, where
added ->iopoll() is not optimised at all:

before: 529504 IOPS
after: 538415 IOPS
diff: ~1.8%

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
fs/io_uring.c | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f767781351f..3216cc00061b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1761,8 +1761,18 @@ static void io_free_req(struct io_kiocb *req)
struct req_batch {
void *reqs[IO_IOPOLL_BATCH];
int to_free;
+
+ struct task_struct *task;
+ int task_refs;
};

+static void io_init_req_batch(struct req_batch *rb)
+{
+ rb->to_free = 0;
+ rb->task_refs = 0;
+ rb->task = NULL;
+}
+
static void __io_req_free_batch_flush(struct io_ring_ctx *ctx,
struct req_batch *rb)
{
@@ -1776,6 +1786,10 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
{
if (rb->to_free)
__io_req_free_batch_flush(ctx, rb);
+ if (rb->task) {
+ put_task_struct_many(rb->task, rb->task_refs);
+ rb->task = NULL;
+ }
}

static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
@@ -1787,6 +1801,16 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
if (req->flags & REQ_F_LINK_HEAD)
io_queue_next(req);

+ if (req->flags & REQ_F_TASK_PINNED) {
+ if (req->task != rb->task && rb->task) {
+ put_task_struct_many(rb->task, rb->task_refs);
+ rb->task = req->task;
+ rb->task_refs = 0;
+ }
+ rb->task_refs++;
+ req->flags &= ~REQ_F_TASK_PINNED;
+ }
+
io_dismantle_req(req);
rb->reqs[rb->to_free++] = req;
if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
@@ -1809,7 +1833,7 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);

- rb.to_free = 0;
+ io_init_req_batch(&rb);
for (i = 0; i < nr; ++i) {
req = cs->reqs[i];
if (refcount_dec_and_test(&req->refs))
@@ -1973,7 +1997,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
/* order with ->result store in io_complete_rw_iopoll() */
smp_rmb();

- rb.to_free = 0;
+ io_init_req_batch(&rb);
while (!list_empty(done)) {
int cflags = 0;

--
2.24.0