[PATCH v2 2/2] io_uring: add io-wq workqueue sharing

From: Pavel Begunkov
Date: Mon Jan 27 2020 - 19:16:49 EST


If IORING_SETUP_ATTACH_WQ is set, it expects wq_fd in io_uring_params to
be a valid io_uring fd io-wq of which will be shared with the newly
created io_uring instance. If the flag is set but it can't share io-wq,
it fails.

This allows creation of "sibling" io_urings, where we prefer to keep the
SQ/CQ private, but want to share the async backend to minimize the amount
of overhead associated with having multiple rings that belong to the same
backend.

Reported-by: Jens Axboe <axboe@xxxxxxxxx>
Reported-by: Daurnimator <quae@xxxxxxxxxxxxxxx>
Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
fs/io_uring.c | 68 +++++++++++++++++++++++++++--------
include/uapi/linux/io_uring.h | 4 ++-
2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ec6428933c3..de1cb3135721 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5692,11 +5692,56 @@ static void io_get_work(struct io_wq_work *work)
refcount_inc(&req->refs);
}

+static int io_init_wq_offload(struct io_ring_ctx *ctx,
+ struct io_uring_params *p)
+{
+ struct io_wq_data data;
+ struct fd f;
+ struct io_ring_ctx *ctx_attach;
+ unsigned int concurrency;
+ int ret = 0;
+
+ data.user = ctx->user;
+ data.get_work = io_get_work;
+ data.put_work = io_put_work;
+
+ if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
+ /* Do QD, or 4 * CPUS, whatever is smallest */
+ concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
+
+ ctx->io_wq = io_wq_create(concurrency, &data);
+ if (IS_ERR(ctx->io_wq)) {
+ ret = PTR_ERR(ctx->io_wq);
+ ctx->io_wq = NULL;
+ }
+ return ret;
+ }
+
+ f = fdget(p->wq_fd);
+ if (!f.file)
+ return -EBADF;
+
+ if (f.file->f_op != &io_uring_fops) {
+ ret = -EOPNOTSUPP;
+ goto out_fput;
+ }
+
+ ctx_attach = f.file->private_data;
+ /* @io_wq is protected by holding the fd */
+ if (!io_wq_get(ctx_attach->io_wq, &data)) {
+ ret = -EINVAL;
+ goto out_fput;
+ }
+
+ ctx->io_wq = ctx_attach->io_wq;
+out_fput:
+ fdput(f);
+ return ret;
+}
+
static int io_sq_offload_start(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
- struct io_wq_data data;
- unsigned concurrency;
int ret;

init_waitqueue_head(&ctx->sqo_wait);
@@ -5740,18 +5785,9 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
goto err;
}

- data.user = ctx->user;
- data.get_work = io_get_work;
- data.put_work = io_put_work;
-
- /* Do QD, or 4 * CPUS, whatever is smallest */
- concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
- ctx->io_wq = io_wq_create(concurrency, &data);
- if (IS_ERR(ctx->io_wq)) {
- ret = PTR_ERR(ctx->io_wq);
- ctx->io_wq = NULL;
+ ret = io_init_wq_offload(ctx, p);
+ if (ret)
goto err;
- }

return 0;
err:
@@ -6577,7 +6613,11 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)

if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
- IORING_SETUP_CLAMP))
+ IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
+ return -EINVAL;
+
+ /* wq_fd isn't valid without ATTACH_WQ being set */
+ if (!(p.flags & IORING_SETUP_ATTACH_WQ) && p.wq_fd)
return -EINVAL;

ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 9988e82f858b..e067b92af5ad 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -75,6 +75,7 @@ enum {
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */

enum {
IORING_OP_NOP,
@@ -183,7 +184,8 @@ struct io_uring_params {
__u32 sq_thread_cpu;
__u32 sq_thread_idle;
__u32 features;
- __u32 resv[4];
+ __u32 wq_fd;
+ __u32 resv[3];
struct io_sqring_offsets sq_off;
struct io_cqring_offsets cq_off;
};
--
2.24.0