[RFC 3/4] io_uring: support futex wake requests

From: Pavel Begunkov
Date: Tue Jun 01 2021 - 10:59:16 EST


Add support for futex wake requests, which also modifies the addr and
checks against it with encoded operation as FUTEX_WAKE_OP does, but only
operates with a single address as may be problematic to squeeze into SQE
and io_kiocb otherwise.

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
fs/io_uring.c | 48 +++++++++++++++++++++++++++++++++--
include/uapi/linux/io_uring.h | 10 +++++++-
2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2c6b14a3a4f6..99f4f8d9f685 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -668,6 +668,12 @@ struct io_unlink {

struct io_futex {
struct file *file;
+ unsigned int futex_op;
+
+ unsigned int nr_wake;
+ unsigned int wake_op_arg;
+ unsigned int flags;
+ void __user *uaddr;
};

struct io_completion {
@@ -5874,12 +5880,50 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)

static int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- return -EINVAL;
+ struct io_futex *f = &req->futex;
+ u64 v;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ return -EINVAL;
+ if (sqe->len)
+ return -EINVAL;
+ f->flags = READ_ONCE(sqe->futex_flags);
+ if (f->flags & ~IORING_FUTEX_SHARED)
+ return -EINVAL;
+
+ v = READ_ONCE(sqe->off);
+ f->nr_wake = (u32)v;
+ f->wake_op_arg = (u32)(v >> 32);
+ f->futex_op = READ_ONCE(sqe->futex_op);
+ f->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ return 0;
}

static int io_futex(struct io_kiocb *req, unsigned int issue_flags)
{
- return -EINVAL;
+ bool nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct io_futex *f = &req->futex;
+ int ret;
+
+ switch (f->futex_op) {
+ case IORING_FUTEX_WAKE_OP:
+ ret = futex_wake_op_single(f->uaddr, f->nr_wake, f->wake_op_arg,
+ !(f->flags & IORING_FUTEX_SHARED),
+ nonblock);
+ /* retry from blocking context */
+ if (nonblock && ret == -EAGAIN)
+ return -EAGAIN;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret < 0)
+ req_set_fail(req);
+ __io_req_complete(req, issue_flags, ret, 0);
+ return 0;
}

static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6a1af5bb2ddf..6fa5a6e59934 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -17,7 +17,10 @@
struct io_uring_sqe {
__u8 opcode; /* type of operation for this sqe */
__u8 flags; /* IOSQE_ flags */
- __u16 ioprio; /* ioprio for the request */
+ union {
+ __u16 ioprio; /* ioprio for the request */
+ __u16 futex_op; /* futex operation */
+ } __attribute__((packed));
__s32 fd; /* file descriptor to do IO on */
union {
__u64 off; /* offset into file */
@@ -161,6 +164,11 @@ enum {
*/
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */

+/*
+ * sqe->futex_flags
+ */
+#define IORING_FUTEX_SHARED (1U << 0)
+
/*
* POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
* command flags for POLL_ADD are stored in sqe->len.
--
2.31.1