[PATCH 5.5 354/367] io_uring: enable option to only trigger eventfd for async completions

From: Greg Kroah-Hartman
Date: Mon Feb 10 2020 - 07:47:27 EST


[ Upstream commit f2842ab5b72d7ee5f7f8385c2d4f32c133f5837b ]

If an application is using eventfd notifications with poll to know when
new SQEs can be issued, it's expecting the following read/writes to
complete inline. And with that, it knows that there are events available,
and don't want spurious wakeups on the eventfd for those requests.

This adds IORING_REGISTER_EVENTFD_ASYNC, which works just like
IORING_REGISTER_EVENTFD, except it only triggers notifications for events
that happen from async completions (IRQ, or io-wq worker completions).
Any completions inline from the submission itself will not trigger
notifications.

Suggested-by: Mark Papadakis <markuspapadakis@xxxxxxxxxx>
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
fs/io_uring.c | 17 ++++++++++++++++-
include/uapi/linux/io_uring.h | 1 +
2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 95fc5c5a85968..131087782bec9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -188,6 +188,7 @@ struct io_ring_ctx {
bool account_mem;
bool cq_overflow_flushed;
bool drain_next;
+ bool eventfd_async;

/*
* Ring buffer of indices into array of io_uring_sqe, which is
@@ -735,13 +736,20 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
return &rings->cqes[tail & ctx->cq_mask];
}

+static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
+{
+ if (!ctx->eventfd_async)
+ return true;
+ return io_wq_current_is_worker() || in_interrupt();
+}
+
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
if (waitqueue_active(&ctx->sqo_wait))
wake_up(&ctx->sqo_wait);
- if (ctx->cq_ev_fd)
+ if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
}

@@ -5486,10 +5494,17 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_sqe_files_update(ctx, arg, nr_args);
break;
case IORING_REGISTER_EVENTFD:
+ case IORING_REGISTER_EVENTFD_ASYNC:
ret = -EINVAL;
if (nr_args != 1)
break;
ret = io_eventfd_register(ctx, arg);
+ if (ret)
+ break;
+ if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
+ ctx->eventfd_async = 1;
+ else
+ ctx->eventfd_async = 0;
break;
case IORING_UNREGISTER_EVENTFD:
ret = -EINVAL;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 55cfcb71606db..88693fed2c4b4 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -175,6 +175,7 @@ struct io_uring_params {
#define IORING_REGISTER_EVENTFD 4
#define IORING_UNREGISTER_EVENTFD 5
#define IORING_REGISTER_FILES_UPDATE 6
+#define IORING_REGISTER_EVENTFD_ASYNC 7

struct io_uring_files_update {
__u32 offset;
--
2.20.1