[PATCH] io_uring/register: add IORING_REGISTER_CLONE_FILES opcode

From: harshal24-chavan

Date: Wed Jun 17 2026 - 01:42:47 EST


Currently, if an application wants to duplicate registered file descriptors
from one io_uring instance to another, it must manually unregister and
re-register them, incurring unnecessary overhead.

Add IORING_REGISTER_CLONE_FILES to allow direct cloning of the file table
from a source ring to a destination ring. This includes support for
partial offsets and the IORING_REGISTER_DST_REPLACE flag.

Signed-off-by: harshal24-chavan <harshal24.chavan@xxxxxxxxx>
---
include/uapi/linux/io_uring.h | 616 +++++++++++++++++-----------------
io_uring/register.c | 57 ++--
io_uring/rsrc.c | 259 +++++++++++---
io_uring/rsrc.h | 75 +++--
4 files changed, 600 insertions(+), 407 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 909fb7aea638..eb6f35b3746e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -30,105 +30,105 @@ extern "C" {
* IO submission data structure (Submission Queue Entry)
*/
struct io_uring_sqe {
- __u8 opcode; /* type of operation for this sqe */
- __u8 flags; /* IOSQE_ flags */
- __u16 ioprio; /* ioprio for the request */
- __s32 fd; /* file descriptor to do IO on */
+ __u8 opcode; /* type of operation for this sqe */
+ __u8 flags; /* IOSQE_ flags */
+ __u16 ioprio; /* ioprio for the request */
+ __s32 fd; /* file descriptor to do IO on */
union {
- __u64 off; /* offset into file */
- __u64 addr2;
+ __u64 off; /* offset into file */
+ __u64 addr2;
struct {
- __u32 cmd_op;
- __u32 __pad1;
+ __u32 cmd_op;
+ __u32 __pad1;
};
};
union {
- __u64 addr; /* pointer to buffer or iovecs */
- __u64 splice_off_in;
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
struct {
- __u32 level;
- __u32 optname;
+ __u32 level;
+ __u32 optname;
};
};
- __u32 len; /* buffer size or number of iovecs */
+ __u32 len; /* buffer size or number of iovecs */
union {
- __u32 rw_flags;
- __u32 fsync_flags;
- __u16 poll_events; /* compatibility */
- __u32 poll32_events; /* word-reversed for BE */
- __u32 sync_range_flags;
- __u32 msg_flags;
- __u32 timeout_flags;
- __u32 accept_flags;
- __u32 cancel_flags;
- __u32 open_flags;
- __u32 statx_flags;
- __u32 fadvise_advice;
- __u32 splice_flags;
- __u32 rename_flags;
- __u32 unlink_flags;
- __u32 hardlink_flags;
- __u32 xattr_flags;
- __u32 msg_ring_flags;
- __u32 uring_cmd_flags;
- __u32 waitid_flags;
- __u32 futex_flags;
- __u32 install_fd_flags;
- __u32 nop_flags;
- __u32 pipe_flags;
+ __u32 rw_flags;
+ __u32 fsync_flags;
+ __u16 poll_events; /* compatibility */
+ __u32 poll32_events; /* word-reversed for BE */
+ __u32 sync_range_flags;
+ __u32 msg_flags;
+ __u32 timeout_flags;
+ __u32 accept_flags;
+ __u32 cancel_flags;
+ __u32 open_flags;
+ __u32 statx_flags;
+ __u32 fadvise_advice;
+ __u32 splice_flags;
+ __u32 rename_flags;
+ __u32 unlink_flags;
+ __u32 hardlink_flags;
+ __u32 xattr_flags;
+ __u32 msg_ring_flags;
+ __u32 uring_cmd_flags;
+ __u32 waitid_flags;
+ __u32 futex_flags;
+ __u32 install_fd_flags;
+ __u32 nop_flags;
+ __u32 pipe_flags;
};
- __u64 user_data; /* data to be passed back at completion time */
+ __u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
union {
/* index into fixed buffers, if used */
- __u16 buf_index;
+ __u16 buf_index;
/* for grouped buffer selection */
- __u16 buf_group;
+ __u16 buf_group;
} __attribute__((packed));
/* personality to use, if used */
- __u16 personality;
+ __u16 personality;
union {
- __s32 splice_fd_in;
- __u32 file_index;
- __u32 zcrx_ifq_idx;
- __u32 optlen;
+ __s32 splice_fd_in;
+ __u32 file_index;
+ __u32 zcrx_ifq_idx;
+ __u32 optlen;
struct {
- __u16 addr_len;
- __u16 __pad3[1];
+ __u16 addr_len;
+ __u16 __pad3[1];
};
struct {
- __u8 write_stream;
- __u8 __pad4[3];
+ __u8 write_stream;
+ __u8 __pad4[3];
};
};
union {
struct {
- __u64 addr3;
- __u64 __pad2[1];
+ __u64 addr3;
+ __u64 __pad2[1];
};
struct {
- __u64 attr_ptr; /* pointer to attribute information */
- __u64 attr_type_mask; /* bit mask of attributes */
+ __u64 attr_ptr; /* pointer to attribute information */
+ __u64 attr_type_mask; /* bit mask of attributes */
};
- __u64 optval;
+ __u64 optval;
/*
* If the ring is initialized with IORING_SETUP_SQE128, then
* this field is used for 80 bytes of arbitrary command data
*/
- __u8 cmd[0];
+ __u8 cmd[0];
};
};

/* sqe->attr_type_mask flags */
-#define IORING_RW_ATTR_FLAG_PI (1U << 0)
+#define IORING_RW_ATTR_FLAG_PI (1U << 0)
/* PI attribute information */
struct io_uring_attr_pi {
- __u16 flags;
- __u16 app_tag;
- __u32 len;
- __u64 addr;
- __u64 seed;
- __u64 rsvd;
+ __u16 flags;
+ __u16 app_tag;
+ __u32 len;
+ __u64 addr;
+ __u64 seed;
+ __u64 rsvd;
};

/*
@@ -138,7 +138,7 @@ struct io_uring_attr_pi {
* in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
* if the space is full.
*/
-#define IORING_FILE_INDEX_ALLOC (~0U)
+#define IORING_FILE_INDEX_ALLOC (~0U)

enum io_uring_sqe_flags_bit {
IOSQE_FIXED_FILE_BIT,
@@ -154,31 +154,31 @@ enum io_uring_sqe_flags_bit {
* sqe->flags
*/
/* use fixed fileset */
-#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
+#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
/* issue after inflight IO */
-#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
+#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
/* links next sqe */
-#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
+#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
/* like LINK, but stronger */
-#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
+#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
/* always go async */
-#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
+#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
/* select buffer from sqe->buf_group */
-#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
+#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
/* don't post CQE if request succeeded */
-#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
+#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)

/*
* io_uring_setup() flags
*/
-#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
-#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
-#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
-#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
-#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
-#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
-#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
-#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
+#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
+#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
+#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
+#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
+#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
+#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
/*
* Cooperative task running. When requests complete, they often require
* forcing the submitter to transition to the kernel to complete. If this
@@ -186,59 +186,59 @@ enum io_uring_sqe_flags_bit {
* than force an inter-processor interrupt reschedule. This avoids interrupting
* a task running in userspace, and saves an IPI.
*/
-#define IORING_SETUP_COOP_TASKRUN (1U << 8)
+#define IORING_SETUP_COOP_TASKRUN (1U << 8)
/*
* If COOP_TASKRUN is set, get notified if task work is available for
* running and a kernel transition would be needed to run it. This sets
* IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN
* or DEFER_TASKRUN.
*/
-#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
-#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
-#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
+#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
+#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
/*
* Only one task is allowed to submit requests
*/
-#define IORING_SETUP_SINGLE_ISSUER (1U << 12)
+#define IORING_SETUP_SINGLE_ISSUER (1U << 12)

/*
* Defer running task work to get events.
* Rather than running bits of task work whenever the task transitions
* try to do it just before it is needed.
*/
-#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
+#define IORING_SETUP_DEFER_TASKRUN (1U << 13)

/*
* Application provides the memory for the rings
*/
-#define IORING_SETUP_NO_MMAP (1U << 14)
+#define IORING_SETUP_NO_MMAP (1U << 14)

/*
* Register the ring fd in itself for use with
* IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
* than an fd.
*/
-#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
+#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)

/*
* Removes indirection through the SQ index array.
*/
-#define IORING_SETUP_NO_SQARRAY (1U << 16)
+#define IORING_SETUP_NO_SQARRAY (1U << 16)

/* Use hybrid poll in iopoll process */
-#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
+#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)

/*
* Allow both 16b and 32b CQEs. If a 32b CQE is posted, it will have
* IORING_CQE_F_32 set in cqe->flags.
*/
-#define IORING_SETUP_CQE_MIXED (1U << 18)
+#define IORING_SETUP_CQE_MIXED (1U << 18)

/*
* Allow both 64b and 128b SQEs. If a 128b SQE is posted, it will have
* a 128b opcode.
*/
-#define IORING_SETUP_SQE_MIXED (1U << 19)
+#define IORING_SETUP_SQE_MIXED (1U << 19)

/*
* When set, io_uring ignores SQ head and tail and fetches SQEs to submit
@@ -250,7 +250,7 @@ enum io_uring_sqe_flags_bit {
* IORING_SETUP_SQPOLL. The user must also never change the SQ head and tail
* values and keep it set to 0. Any other value is undefined behaviour.
*/
-#define IORING_SETUP_SQ_REWIND (1U << 20)
+#define IORING_SETUP_SQ_REWIND (1U << 20)

enum io_uring_op {
IORING_OP_NOP,
@@ -331,15 +331,15 @@ enum io_uring_op {
* multishot commands. Not compatible with
* IORING_URING_CMD_FIXED, for now.
*/
-#define IORING_URING_CMD_FIXED (1U << 0)
-#define IORING_URING_CMD_MULTISHOT (1U << 1)
-#define IORING_URING_CMD_MASK (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
-
+#define IORING_URING_CMD_FIXED (1U << 0)
+#define IORING_URING_CMD_MULTISHOT (1U << 1)
+#define IORING_URING_CMD_MASK \
+ (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)

/*
* sqe->fsync_flags
*/
-#define IORING_FSYNC_DATASYNC (1U << 0)
+#define IORING_FSYNC_DATASYNC (1U << 0)

/*
* sqe->timeout_flags
@@ -348,21 +348,23 @@ enum io_uring_op {
* value in nanoseconds instead of
* pointing to a timespec.
*/
-#define IORING_TIMEOUT_ABS (1U << 0)
-#define IORING_TIMEOUT_UPDATE (1U << 1)
-#define IORING_TIMEOUT_BOOTTIME (1U << 2)
-#define IORING_TIMEOUT_REALTIME (1U << 3)
-#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
-#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
-#define IORING_TIMEOUT_MULTISHOT (1U << 6)
-#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7)
-#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
-#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
+#define IORING_TIMEOUT_ABS (1U << 0)
+#define IORING_TIMEOUT_UPDATE (1U << 1)
+#define IORING_TIMEOUT_BOOTTIME (1U << 2)
+#define IORING_TIMEOUT_REALTIME (1U << 3)
+#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
+#define IORING_TIMEOUT_MULTISHOT (1U << 6)
+#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7)
+#define IORING_TIMEOUT_CLOCK_MASK \
+ (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
+#define IORING_TIMEOUT_UPDATE_MASK \
+ (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
* sqe->splice_flags
* extends splice(2) flags
*/
-#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */

/*
* POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
@@ -377,10 +379,10 @@ enum io_uring_op {
*
* IORING_POLL_LEVEL Level triggered poll.
*/
-#define IORING_POLL_ADD_MULTI (1U << 0)
-#define IORING_POLL_UPDATE_EVENTS (1U << 1)
-#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
-#define IORING_POLL_ADD_LEVEL (1U << 3)
+#define IORING_POLL_ADD_MULTI (1U << 0)
+#define IORING_POLL_UPDATE_EVENTS (1U << 1)
+#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
+#define IORING_POLL_ADD_LEVEL (1U << 3)

/*
* ASYNC_CANCEL flags.
@@ -393,12 +395,12 @@ enum io_uring_op {
* IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key
* IORING_ASYNC_CANCEL_OP Match request based on opcode
*/
-#define IORING_ASYNC_CANCEL_ALL (1U << 0)
-#define IORING_ASYNC_CANCEL_FD (1U << 1)
-#define IORING_ASYNC_CANCEL_ANY (1U << 2)
-#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
-#define IORING_ASYNC_CANCEL_USERDATA (1U << 4)
-#define IORING_ASYNC_CANCEL_OP (1U << 5)
+#define IORING_ASYNC_CANCEL_ALL (1U << 0)
+#define IORING_ASYNC_CANCEL_FD (1U << 1)
+#define IORING_ASYNC_CANCEL_ANY (1U << 2)
+#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
+#define IORING_ASYNC_CANCEL_USERDATA (1U << 4)
+#define IORING_ASYNC_CANCEL_OP (1U << 5)

/*
* send/sendmsg and recv/recvmsg flags (sqe->ioprio)
@@ -434,12 +436,12 @@ enum io_uring_op {
* IORING_SEND_VECTORIZED If set, SEND[_ZC] will take a pointer to a io_vec
* to allow vectorized send operations.
*/
-#define IORING_RECVSEND_POLL_FIRST (1U << 0)
-#define IORING_RECV_MULTISHOT (1U << 1)
-#define IORING_RECVSEND_FIXED_BUF (1U << 2)
-#define IORING_SEND_ZC_REPORT_USAGE (1U << 3)
-#define IORING_RECVSEND_BUNDLE (1U << 4)
-#define IORING_SEND_VECTORIZED (1U << 5)
+#define IORING_RECVSEND_POLL_FIRST (1U << 0)
+#define IORING_RECV_MULTISHOT (1U << 1)
+#define IORING_RECVSEND_FIXED_BUF (1U << 2)
+#define IORING_SEND_ZC_REPORT_USAGE (1U << 3)
+#define IORING_RECVSEND_BUNDLE (1U << 4)
+#define IORING_SEND_VECTORIZED (1U << 5)

/*
* cqe.res for IORING_CQE_F_NOTIF if
@@ -448,21 +450,21 @@ enum io_uring_op {
* It should be treated as a flag, all other
* bits of cqe.res should be treated as reserved!
*/
-#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31)
+#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31)

/*
* accept flags stored in sqe->ioprio
*/
-#define IORING_ACCEPT_MULTISHOT (1U << 0)
-#define IORING_ACCEPT_DONTWAIT (1U << 1)
-#define IORING_ACCEPT_POLL_FIRST (1U << 2)
+#define IORING_ACCEPT_MULTISHOT (1U << 0)
+#define IORING_ACCEPT_DONTWAIT (1U << 1)
+#define IORING_ACCEPT_POLL_FIRST (1U << 2)

/*
* IORING_OP_MSG_RING command types, stored in sqe->addr
*/
enum io_uring_msg_ring_flags {
- IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */
- IORING_MSG_SEND_FD, /* send a registered fd to another ring */
+ IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */
+ IORING_MSG_SEND_FD, /* send a registered fd to another ring */
};

/*
@@ -471,36 +473,36 @@ enum io_uring_msg_ring_flags {
* IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not
* applicable for IORING_MSG_DATA, obviously.
*/
-#define IORING_MSG_RING_CQE_SKIP (1U << 0)
+#define IORING_MSG_RING_CQE_SKIP (1U << 0)
/* Pass through the flags from sqe->file_index to cqe->flags */
-#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
+#define IORING_MSG_RING_FLAGS_PASS (1U << 1)

/*
* IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
*
* IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
*/
-#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
+#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)

/*
* IORING_OP_NOP flags (sqe->nop_flags)
*
* IORING_NOP_INJECT_RESULT Inject result from sqe->result
*/
-#define IORING_NOP_INJECT_RESULT (1U << 0)
-#define IORING_NOP_FILE (1U << 1)
-#define IORING_NOP_FIXED_FILE (1U << 2)
-#define IORING_NOP_FIXED_BUFFER (1U << 3)
-#define IORING_NOP_TW (1U << 4)
-#define IORING_NOP_CQE32 (1U << 5)
+#define IORING_NOP_INJECT_RESULT (1U << 0)
+#define IORING_NOP_FILE (1U << 1)
+#define IORING_NOP_FIXED_FILE (1U << 2)
+#define IORING_NOP_FIXED_BUFFER (1U << 3)
+#define IORING_NOP_TW (1U << 4)
+#define IORING_NOP_CQE32 (1U << 5)

/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
- __u64 user_data; /* sqe->user_data value passed back */
- __s32 res; /* result code for this event */
- __u32 flags;
+ __u64 user_data; /* sqe->user_data value passed back */
+ __s32 res; /* result code for this event */
+ __u32 flags;

/*
* If the ring is initialized with IORING_SETUP_CQE32, then this field
@@ -535,25 +537,25 @@ struct io_uring_cqe {
* setup in a mixed CQE mode, where both 16b and 32b
* CQEs may be posted to the CQ ring.
*/
-#define IORING_CQE_F_BUFFER (1U << 0)
-#define IORING_CQE_F_MORE (1U << 1)
-#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
-#define IORING_CQE_F_NOTIF (1U << 3)
-#define IORING_CQE_F_BUF_MORE (1U << 4)
-#define IORING_CQE_F_SKIP (1U << 5)
-#define IORING_CQE_F_32 (1U << 15)
+#define IORING_CQE_F_BUFFER (1U << 0)
+#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
+#define IORING_CQE_F_NOTIF (1U << 3)
+#define IORING_CQE_F_BUF_MORE (1U << 4)
+#define IORING_CQE_F_SKIP (1U << 5)
+#define IORING_CQE_F_32 (1U << 15)

-#define IORING_CQE_BUFFER_SHIFT 16
+#define IORING_CQE_BUFFER_SHIFT 16

/*
* Magic offsets for the application to mmap the data it needs
*/
-#define IORING_OFF_SQ_RING 0ULL
-#define IORING_OFF_CQ_RING 0x8000000ULL
-#define IORING_OFF_SQES 0x10000000ULL
-#define IORING_OFF_PBUF_RING 0x80000000ULL
-#define IORING_OFF_PBUF_SHIFT 16
-#define IORING_OFF_MMAP_MASK 0xf8000000ULL
+#define IORING_OFF_SQ_RING 0ULL
+#define IORING_OFF_CQ_RING 0x8000000ULL
+#define IORING_OFF_SQES 0x10000000ULL
+#define IORING_OFF_PBUF_RING 0x80000000ULL
+#define IORING_OFF_PBUF_SHIFT 16
+#define IORING_OFF_MMAP_MASK 0xf8000000ULL

/*
* Filled with the offset for mmap(2)
@@ -573,9 +575,9 @@ struct io_sqring_offsets {
/*
* sq_ring->flags
*/
-#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
-#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
-#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */
+#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */

struct io_cqring_offsets {
__u32 head;
@@ -594,19 +596,19 @@ struct io_cqring_offsets {
*/

/* disable eventfd notifications */
-#define IORING_CQ_EVENTFD_DISABLED (1U << 0)
+#define IORING_CQ_EVENTFD_DISABLED (1U << 0)

/*
* io_uring_enter(2) flags
*/
-#define IORING_ENTER_GETEVENTS (1U << 0)
-#define IORING_ENTER_SQ_WAKEUP (1U << 1)
-#define IORING_ENTER_SQ_WAIT (1U << 2)
-#define IORING_ENTER_EXT_ARG (1U << 3)
-#define IORING_ENTER_REGISTERED_RING (1U << 4)
-#define IORING_ENTER_ABS_TIMER (1U << 5)
-#define IORING_ENTER_EXT_ARG_REG (1U << 6)
-#define IORING_ENTER_NO_IOWAIT (1U << 7)
+#define IORING_ENTER_GETEVENTS (1U << 0)
+#define IORING_ENTER_SQ_WAKEUP (1U << 1)
+#define IORING_ENTER_SQ_WAIT (1U << 2)
+#define IORING_ENTER_EXT_ARG (1U << 3)
+#define IORING_ENTER_REGISTERED_RING (1U << 4)
+#define IORING_ENTER_ABS_TIMER (1U << 5)
+#define IORING_ENTER_EXT_ARG_REG (1U << 6)
+#define IORING_ENTER_NO_IOWAIT (1U << 7)

/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -627,107 +629,110 @@ struct io_uring_params {
/*
* io_uring_params->features flags
*/
-#define IORING_FEAT_SINGLE_MMAP (1U << 0)
-#define IORING_FEAT_NODROP (1U << 1)
-#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
-#define IORING_FEAT_RW_CUR_POS (1U << 3)
-#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
-#define IORING_FEAT_FAST_POLL (1U << 5)
-#define IORING_FEAT_POLL_32BITS (1U << 6)
-#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
-#define IORING_FEAT_EXT_ARG (1U << 8)
-#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
-#define IORING_FEAT_RSRC_TAGS (1U << 10)
-#define IORING_FEAT_CQE_SKIP (1U << 11)
-#define IORING_FEAT_LINKED_FILE (1U << 12)
-#define IORING_FEAT_REG_REG_RING (1U << 13)
-#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
-#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
-#define IORING_FEAT_RW_ATTR (1U << 16)
-#define IORING_FEAT_NO_IOWAIT (1U << 17)
+#define IORING_FEAT_SINGLE_MMAP (1U << 0)
+#define IORING_FEAT_NODROP (1U << 1)
+#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
+#define IORING_FEAT_RW_CUR_POS (1U << 3)
+#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
+#define IORING_FEAT_FAST_POLL (1U << 5)
+#define IORING_FEAT_POLL_32BITS (1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
+#define IORING_FEAT_EXT_ARG (1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
+#define IORING_FEAT_RSRC_TAGS (1U << 10)
+#define IORING_FEAT_CQE_SKIP (1U << 11)
+#define IORING_FEAT_LINKED_FILE (1U << 12)
+#define IORING_FEAT_REG_REG_RING (1U << 13)
+#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
+#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
+#define IORING_FEAT_RW_ATTR (1U << 16)
+#define IORING_FEAT_NO_IOWAIT (1U << 17)

/*
* io_uring_register(2) opcodes and arguments
*/
enum io_uring_register_op {
- IORING_REGISTER_BUFFERS = 0,
- IORING_UNREGISTER_BUFFERS = 1,
- IORING_REGISTER_FILES = 2,
- IORING_UNREGISTER_FILES = 3,
- IORING_REGISTER_EVENTFD = 4,
- IORING_UNREGISTER_EVENTFD = 5,
- IORING_REGISTER_FILES_UPDATE = 6,
- IORING_REGISTER_EVENTFD_ASYNC = 7,
- IORING_REGISTER_PROBE = 8,
- IORING_REGISTER_PERSONALITY = 9,
- IORING_UNREGISTER_PERSONALITY = 10,
- IORING_REGISTER_RESTRICTIONS = 11,
- IORING_REGISTER_ENABLE_RINGS = 12,
+ IORING_REGISTER_BUFFERS = 0,
+ IORING_UNREGISTER_BUFFERS = 1,
+ IORING_REGISTER_FILES = 2,
+ IORING_UNREGISTER_FILES = 3,
+ IORING_REGISTER_EVENTFD = 4,
+ IORING_UNREGISTER_EVENTFD = 5,
+ IORING_REGISTER_FILES_UPDATE = 6,
+ IORING_REGISTER_EVENTFD_ASYNC = 7,
+ IORING_REGISTER_PROBE = 8,
+ IORING_REGISTER_PERSONALITY = 9,
+ IORING_UNREGISTER_PERSONALITY = 10,
+ IORING_REGISTER_RESTRICTIONS = 11,
+ IORING_REGISTER_ENABLE_RINGS = 12,

/* extended with tagging */
- IORING_REGISTER_FILES2 = 13,
- IORING_REGISTER_FILES_UPDATE2 = 14,
- IORING_REGISTER_BUFFERS2 = 15,
- IORING_REGISTER_BUFFERS_UPDATE = 16,
+ IORING_REGISTER_FILES2 = 13,
+ IORING_REGISTER_FILES_UPDATE2 = 14,
+ IORING_REGISTER_BUFFERS2 = 15,
+ IORING_REGISTER_BUFFERS_UPDATE = 16,

/* set/clear io-wq thread affinities */
- IORING_REGISTER_IOWQ_AFF = 17,
- IORING_UNREGISTER_IOWQ_AFF = 18,
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,

/* set/get max number of io-wq workers */
- IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+ IORING_REGISTER_IOWQ_MAX_WORKERS = 19,

/* register/unregister io_uring fd with the ring */
- IORING_REGISTER_RING_FDS = 20,
- IORING_UNREGISTER_RING_FDS = 21,
+ IORING_REGISTER_RING_FDS = 20,
+ IORING_UNREGISTER_RING_FDS = 21,

/* register ring based provide buffer group */
- IORING_REGISTER_PBUF_RING = 22,
- IORING_UNREGISTER_PBUF_RING = 23,
+ IORING_REGISTER_PBUF_RING = 22,
+ IORING_UNREGISTER_PBUF_RING = 23,

/* sync cancelation API */
- IORING_REGISTER_SYNC_CANCEL = 24,
+ IORING_REGISTER_SYNC_CANCEL = 24,

/* register a range of fixed file slots for automatic slot allocation */
- IORING_REGISTER_FILE_ALLOC_RANGE = 25,
+ IORING_REGISTER_FILE_ALLOC_RANGE = 25,

/* return status information for a buffer group */
- IORING_REGISTER_PBUF_STATUS = 26,
+ IORING_REGISTER_PBUF_STATUS = 26,

/* set/clear busy poll settings */
- IORING_REGISTER_NAPI = 27,
- IORING_UNREGISTER_NAPI = 28,
+ IORING_REGISTER_NAPI = 27,
+ IORING_UNREGISTER_NAPI = 28,

- IORING_REGISTER_CLOCK = 29,
+ IORING_REGISTER_CLOCK = 29,

/* clone registered buffers from source ring to current ring */
- IORING_REGISTER_CLONE_BUFFERS = 30,
+ IORING_REGISTER_CLONE_BUFFERS = 30,

/* send MSG_RING without having a ring */
- IORING_REGISTER_SEND_MSG_RING = 31,
+ IORING_REGISTER_SEND_MSG_RING = 31,

/* register a netdev hw rx queue for zerocopy */
- IORING_REGISTER_ZCRX_IFQ = 32,
+ IORING_REGISTER_ZCRX_IFQ = 32,

/* resize CQ ring */
- IORING_REGISTER_RESIZE_RINGS = 33,
+ IORING_REGISTER_RESIZE_RINGS = 33,

- IORING_REGISTER_MEM_REGION = 34,
+ IORING_REGISTER_MEM_REGION = 34,

/* query various aspects of io_uring, see linux/io_uring/query.h */
- IORING_REGISTER_QUERY = 35,
+ IORING_REGISTER_QUERY = 35,

/* auxiliary zcrx configuration, see enum zcrx_ctrl_op */
- IORING_REGISTER_ZCRX_CTRL = 36,
+ IORING_REGISTER_ZCRX_CTRL = 36,

/* register bpf filtering programs */
- IORING_REGISTER_BPF_FILTER = 37,
+ IORING_REGISTER_BPF_FILTER = 37,
+
+ /* clone file descriptors from another ring*/
+ IORING_REGISTER_CLONE_FILES = 38,

/* this goes last */
IORING_REGISTER_LAST,

/* flag added to the opcode to use a registered ring fd */
- IORING_REGISTER_USE_REGISTERED_RING = 1U << 31
+ IORING_REGISTER_USE_REGISTERED_RING = 1U << 31
};

/* io-wq worker categories */
@@ -745,7 +750,7 @@ struct io_uring_files_update {

enum {
/* initialise with user provided memory pointed by user_addr */
- IORING_MEM_REGION_TYPE_USER = 1,
+ IORING_MEM_REGION_TYPE_USER = 1,
};

struct io_uring_region_desc {
@@ -759,7 +764,7 @@ struct io_uring_region_desc {

enum {
/* expose the region as registered wait arguments */
- IORING_MEM_REGION_REG_WAIT_ARG = 1,
+ IORING_MEM_REGION_REG_WAIT_ARG = 1,
};

struct io_uring_mem_region_reg {
@@ -772,7 +777,7 @@ struct io_uring_mem_region_reg {
* Register a fully sparse file space, rather than pass in an array of all
* -1 file descriptors.
*/
-#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
+#define IORING_RSRC_REGISTER_SPARSE (1U << 0)

struct io_uring_rsrc_register {
__u32 nr;
@@ -798,20 +803,20 @@ struct io_uring_rsrc_update2 {
};

/* Skip updating fd indexes set to this value in the fd table */
-#define IORING_REGISTER_FILES_SKIP (-2)
+#define IORING_REGISTER_FILES_SKIP (-2)

-#define IO_URING_OP_SUPPORTED (1U << 0)
+#define IO_URING_OP_SUPPORTED (1U << 0)

struct io_uring_probe_op {
__u8 op;
__u8 resv;
- __u16 flags; /* IO_URING_OP_* flags */
+ __u16 flags; /* IO_URING_OP_* flags */
__u32 resv2;
};

struct io_uring_probe {
- __u8 last_op; /* last opcode supported */
- __u8 ops_len; /* length of ops[] array below */
+ __u8 last_op; /* last opcode supported */
+ __u8 ops_len; /* length of ops[] array below */
__u16 resv;
__u32 resv2[3];
struct io_uring_probe_op ops[];
@@ -821,8 +826,8 @@ struct io_uring_restriction {
__u16 opcode;
union {
__u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
- __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
- __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
+ __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
+ __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
};
__u8 resv;
__u32 resv2[3];
@@ -836,29 +841,38 @@ struct io_uring_task_restriction {
};

struct io_uring_clock_register {
- __u32 clockid;
- __u32 __resv[3];
+ __u32 clockid;
+ __u32 __resv[3];
};

enum {
- IORING_REGISTER_SRC_REGISTERED = (1U << 0),
- IORING_REGISTER_DST_REPLACE = (1U << 1),
+ IORING_REGISTER_SRC_REGISTERED = (1U << 0),
+ IORING_REGISTER_DST_REPLACE = (1U << 1),
};

struct io_uring_clone_buffers {
- __u32 src_fd;
- __u32 flags;
- __u32 src_off;
- __u32 dst_off;
- __u32 nr;
- __u32 pad[3];
+ __u32 src_fd;
+ __u32 flags;
+ __u32 src_off;
+ __u32 dst_off;
+ __u32 nr;
+ __u32 pad[3];
+};
+
+struct io_uring_clone_files {
+ __u32 src_fd;
+ __u32 flags;
+ __u32 src_off;
+ __u32 dst_off;
+ __u32 nr;
+ __u32 pad[3];
};

struct io_uring_buf {
- __u64 addr;
- __u32 len;
- __u16 bid;
- __u16 resv;
+ __u64 addr;
+ __u32 len;
+ __u16 bid;
+ __u16 resv;
};

struct io_uring_buf_ring {
@@ -868,10 +882,10 @@ struct io_uring_buf_ring {
* ring tail is overlaid with the io_uring_buf->resv field.
*/
struct {
- __u64 resv1;
- __u32 resv2;
- __u16 resv3;
- __u16 tail;
+ __u64 resv1;
+ __u32 resv2;
+ __u16 resv3;
+ __u16 tail;
};
__DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs);
};
@@ -895,25 +909,25 @@ struct io_uring_buf_ring {
* track of where the current read/recv index is at.
*/
enum io_uring_register_pbuf_ring_flags {
- IOU_PBUF_RING_MMAP = 1,
- IOU_PBUF_RING_INC = 2,
+ IOU_PBUF_RING_MMAP = 1,
+ IOU_PBUF_RING_INC = 2,
};

/* argument for IORING_(UN)REGISTER_PBUF_RING */
struct io_uring_buf_reg {
- __u64 ring_addr;
- __u32 ring_entries;
- __u16 bgid;
- __u16 flags;
- __u32 min_left;
- __u32 resv[5];
+ __u64 ring_addr;
+ __u32 ring_entries;
+ __u16 bgid;
+ __u16 flags;
+ __u32 min_left;
+ __u32 resv[5];
};

/* argument for IORING_REGISTER_PBUF_STATUS */
struct io_uring_buf_status {
- __u32 buf_group; /* input */
- __u32 head; /* output */
- __u32 resv[8];
+ __u32 buf_group; /* input */
+ __u32 head; /* output */
+ __u32 resv[8];
};

enum io_uring_napi_op {
@@ -934,12 +948,12 @@ enum io_uring_napi_tracking_strategy {

/* argument for IORING_(UN)REGISTER_NAPI */
struct io_uring_napi {
- __u32 busy_poll_to;
- __u8 prefer_busy_poll;
+ __u32 busy_poll_to;
+ __u8 prefer_busy_poll;

/* a io_uring_napi_op value */
- __u8 opcode;
- __u8 pad[2];
+ __u8 opcode;
+ __u8 pad[2];

/*
* for IO_URING_NAPI_REGISTER_OP, it is a
@@ -948,8 +962,8 @@ struct io_uring_napi {
* for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID
* it is the napi id to add/del from napi_list.
*/
- __u32 op_param;
- __u32 resv;
+ __u32 op_param;
+ __u32 resv;
};

/*
@@ -957,22 +971,22 @@ struct io_uring_napi {
*/
enum io_uring_register_restriction_op {
/* Allow an io_uring_register(2) opcode */
- IORING_RESTRICTION_REGISTER_OP = 0,
+ IORING_RESTRICTION_REGISTER_OP = 0,

/* Allow an sqe opcode */
- IORING_RESTRICTION_SQE_OP = 1,
+ IORING_RESTRICTION_SQE_OP = 1,

/* Allow sqe flags */
- IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,
+ IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,

/* Require sqe flags (these flags must be set on each submission) */
- IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,
+ IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,

IORING_RESTRICTION_LAST
};

enum {
- IORING_REG_WAIT_TS = (1U << 0),
+ IORING_REG_WAIT_TS = (1U << 0),
};

/*
@@ -982,36 +996,36 @@ enum {
* the below structure.
*/
struct io_uring_reg_wait {
- struct __kernel_timespec ts;
- __u32 min_wait_usec;
- __u32 flags;
- __u64 sigmask;
- __u32 sigmask_sz;
- __u32 pad[3];
- __u64 pad2[2];
+ struct __kernel_timespec ts;
+ __u32 min_wait_usec;
+ __u32 flags;
+ __u64 sigmask;
+ __u32 sigmask_sz;
+ __u32 pad[3];
+ __u64 pad2[2];
};

/*
* Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG
*/
struct io_uring_getevents_arg {
- __u64 sigmask;
- __u32 sigmask_sz;
- __u32 min_wait_usec;
- __u64 ts;
+ __u64 sigmask;
+ __u32 sigmask_sz;
+ __u32 min_wait_usec;
+ __u64 ts;
};

/*
* Argument for IORING_REGISTER_SYNC_CANCEL
*/
struct io_uring_sync_cancel_reg {
- __u64 addr;
- __s32 fd;
- __u32 flags;
- struct __kernel_timespec timeout;
- __u8 opcode;
- __u8 pad[7];
- __u64 pad2[3];
+ __u64 addr;
+ __s32 fd;
+ __u32 flags;
+ struct __kernel_timespec timeout;
+ __u8 opcode;
+ __u8 pad[7];
+ __u64 pad2[3];
};

/*
@@ -1019,9 +1033,9 @@ struct io_uring_sync_cancel_reg {
* The range is specified as [off, off + len)
*/
struct io_uring_file_index_range {
- __u32 off;
- __u32 len;
- __u64 resv;
+ __u32 off;
+ __u32 len;
+ __u64 resv;
};

struct io_uring_recvmsg_out {
@@ -1035,7 +1049,7 @@ struct io_uring_recvmsg_out {
* Argument for IORING_OP_URING_CMD when file is a socket
*/
enum io_uring_socket_op {
- SOCKET_URING_OP_SIOCINQ = 0,
+ SOCKET_URING_OP_SIOCINQ = 0,
SOCKET_URING_OP_SIOCOUTQ,
SOCKET_URING_OP_GETSOCKOPT,
SOCKET_URING_OP_SETSOCKOPT,
@@ -1047,15 +1061,15 @@ enum io_uring_socket_op {
* SOCKET_URING_OP_TX_TIMESTAMP definitions
*/

-#define IORING_TIMESTAMP_HW_SHIFT 16
+#define IORING_TIMESTAMP_HW_SHIFT 16
/* The cqe->flags bit from which the timestamp type is stored */
-#define IORING_TIMESTAMP_TYPE_SHIFT (IORING_TIMESTAMP_HW_SHIFT + 1)
+#define IORING_TIMESTAMP_TYPE_SHIFT (IORING_TIMESTAMP_HW_SHIFT + 1)
/* The cqe->flags flag signifying whether it's a hardware timestamp */
-#define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT)
+#define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT)

struct io_timespec {
- __u64 tv_sec;
- __u64 tv_nsec;
+ __u64 tv_sec;
+ __u64 tv_nsec;
};

#ifdef __cplusplus
diff --git a/io_uring/register.c b/io_uring/register.c
index dce5e2f9cf77..6a6b7f6a169e 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -35,8 +35,8 @@
#include "query.h"
#include "bpf_filter.h"

-#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
- IORING_REGISTER_LAST + IORING_OP_LAST)
+#define IORING_MAX_RESTRICTIONS \
+ (IORING_RESTRICTION_LAST + IORING_REGISTER_LAST + IORING_OP_LAST)

static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
@@ -86,7 +86,6 @@ int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
return -EINVAL;
}

-
static int io_register_personality(struct io_ring_ctx *ctx)
{
const struct cred *creds;
@@ -96,7 +95,8 @@ static int io_register_personality(struct io_ring_ctx *ctx)
creds = get_current_cred();

ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
- XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
+ XA_LIMIT(0, USHRT_MAX), &ctx->pers_next,
+ GFP_KERNEL);
if (ret < 0) {
put_cred(creds);
return ret;
@@ -133,7 +133,8 @@ static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args,
case IORING_RESTRICTION_REGISTER_OP:
if (res[i].register_op >= IORING_REGISTER_LAST)
goto err;
- __set_bit(res[i].register_op, restrictions->register_op);
+ __set_bit(res[i].register_op,
+ restrictions->register_op);
restrictions->reg_registered = true;
break;
case IORING_RESTRICTION_SQE_OP:
@@ -165,7 +166,8 @@ static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args,
}

static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
- void __user *arg, unsigned int nr_args)
+ void __user *arg,
+ unsigned int nr_args)
{
int ret;

@@ -484,22 +486,23 @@ static void io_register_free_rings(struct io_ring_ctx *ctx,
io_free_region(ctx->user, &r->ring_region);
}

-#define swap_old(ctx, o, n, field) \
- do { \
- (o).field = (ctx)->field; \
- (ctx)->field = (n).field; \
+#define swap_old(ctx, o, n, field) \
+ do { \
+ (o).field = (ctx)->field; \
+ (ctx)->field = (n).field; \
} while (0)

-#define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
-#define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \
- IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP | \
- IORING_SETUP_CQE_MIXED | IORING_SETUP_SQE_MIXED)
+#define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
+#define COPY_FLAGS \
+ (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | IORING_SETUP_CQE32 | \
+ IORING_SETUP_NO_MMAP | IORING_SETUP_CQE_MIXED | \
+ IORING_SETUP_SQE_MIXED)

static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_ctx_config config;
struct io_uring_region_desc rd;
- struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
+ struct io_ring_ctx_rings o = {}, n = {}, *to_free = NULL;
unsigned i, tail, old_head;
struct io_uring_params *p = &config.p;
struct io_rings_layout *rl = &config.layout;
@@ -612,7 +615,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
src_mask = (ctx->sq_entries << 1) - 1;
dst_mask = (p->sq_entries << 1) - 1;
}
- memcpy(&n.sq_sqes[index & dst_mask], &o.sq_sqes[index & src_mask], sq_size);
+ memcpy(&n.sq_sqes[index & dst_mask],
+ &o.sq_sqes[index & src_mask], sq_size);
}
WRITE_ONCE(n.rings->sq.head, old_head);
WRITE_ONCE(n.rings->sq.tail, tail);
@@ -642,7 +646,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
src_mask = (ctx->cq_entries << 1) - 1;
dst_mask = (p->cq_entries << 1) - 1;
}
- memcpy(&n.rings->cqes[index & dst_mask], &o.rings->cqes[index & src_mask], cq_size);
+ memcpy(&n.rings->cqes[index & dst_mask],
+ &o.rings->cqes[index & src_mask], cq_size);
}
WRITE_ONCE(n.rings->cq.head, old_head);
WRITE_ONCE(n.rings->cq.tail, tail);
@@ -666,7 +671,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
* should act on unconditionally. Worst case it'll be an extra
* syscall.
*/
- atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags);
+ atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP,
+ &n.rings->sq_flags);
ctx->rings = n.rings;
rcu_assign_pointer(ctx->rings_rcu, n.rings);

@@ -738,8 +744,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)

static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
- __releases(ctx->uring_lock)
- __acquires(ctx->uring_lock)
+ __releases(ctx->uring_lock) __acquires(ctx->uring_lock)
{
int ret;

@@ -753,7 +758,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (ctx->submitter_task && ctx->submitter_task != current)
return -EEXIST;

- if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
+ if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) &&
+ !(ctx->flags & IORING_SETUP_R_DISABLED)) {
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
if (!test_bit(opcode, ctx->restrictions.register_op))
return -EACCES;
@@ -924,6 +930,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_clone_buffers(ctx, arg);
break;
+ case IORING_REGISTER_CLONE_FILES:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_clone_files(ctx, arg);
+ break;
case IORING_REGISTER_ZCRX_IFQ:
ret = -EINVAL;
if (!arg || nr_args != 1)
@@ -966,7 +978,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
return ret;
}

-static int io_uring_register_send_msg_ring(void __user *arg, unsigned int nr_args)
+static int io_uring_register_send_msg_ring(void __user *arg,
+ unsigned int nr_args)
{
struct io_uring_sqe sqe;

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 650303626be6..b7afb2a05f4a 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -21,20 +21,21 @@
#include "register.h"

struct io_rsrc_update {
- struct file *file;
- u64 arg;
- u32 nr_args;
- u32 offset;
+ struct file *file;
+ u64 arg;
+ u32 nr_args;
+ u32 offset;
};

static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
- struct iovec *iov, struct page **last_hpage);
+ struct iovec *iov,
+ struct page **last_hpage);

/* only define max */
-#define IORING_MAX_FIXED_FILES (1U << 20)
-#define IORING_MAX_REG_BUFFERS (1U << 14)
+#define IORING_MAX_FIXED_FILES (1U << 20)
+#define IORING_MAX_REG_BUFFERS (1U << 14)

-#define IO_CACHED_BVECS_SEGS 32
+#define IO_CACHED_BVECS_SEGS 32

int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
{
@@ -51,8 +52,8 @@ int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
new_pages = cur_pages + nr_pages;
if (new_pages > page_limit)
return -ENOMEM;
- } while (!atomic_long_try_cmpxchg(&user->locked_vm,
- &cur_pages, new_pages));
+ } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages,
+ new_pages));
return 0;
}

@@ -485,8 +486,8 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
ret = io_files_update_with_index_alloc(req, issue_flags);
} else {
io_ring_submit_lock(ctx, issue_flags);
- ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
- &up2, up->nr_args);
+ ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up2,
+ up->nr_args);
io_ring_submit_unlock(ctx, issue_flags);
}

@@ -529,7 +530,7 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx)
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args, u64 __user *tags)
{
- __s32 __user *fds = (__s32 __user *) arg;
+ __s32 __user *fds = (__s32 __user *)arg;
struct file *file;
int fd, ret;
unsigned i;
@@ -678,7 +679,7 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
}

static bool io_coalesce_buffer(struct page ***pages, int *nr_pages,
- struct io_imu_folio_data *data)
+ struct io_imu_folio_data *data)
{
struct page **page_array = *pages, **new_array = NULL;
unsigned nr_pages_left = *nr_pages;
@@ -732,14 +733,14 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
*/
for (i = 1; i < nr_pages; i++) {
if (page_folio(page_array[i]) == folio &&
- page_array[i] == page_array[i-1] + 1) {
+ page_array[i] == page_array[i - 1] + 1) {
count++;
continue;
}

if (nr_folios == 1) {
- if (folio_page_idx(folio, page_array[i-1]) !=
- data->nr_pages_mid - 1)
+ if (folio_page_idx(folio, page_array[i - 1]) !=
+ data->nr_pages_mid - 1)
return false;

data->nr_pages_head = count;
@@ -749,7 +750,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,

folio = page_folio(page_array[i]);
if (folio_size(folio) != (1UL << data->folio_shift) ||
- folio_page_idx(folio, page_array[i]) != 0)
+ folio_page_idx(folio, page_array[i]) != 0)
return false;

count = 1;
@@ -792,8 +793,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
return ERR_PTR(-ENOMEM);

ret = -ENOMEM;
- pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
- &nr_pages);
+ pages = io_pin_pages((unsigned long)iov->iov_base, iov->iov_len,
+ &nr_pages);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
pages = NULL;
@@ -803,7 +804,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
/* If it's huge page(s), try to coalesce them into fewer bvec entries */
if (nr_pages > 1 && io_check_coalesce_buffer(pages, nr_pages, &data)) {
if (data.nr_pages_mid != 1)
- coalesced = io_coalesce_buffer(&pages, &nr_pages, &data);
+ coalesced =
+ io_coalesce_buffer(&pages, &nr_pages, &data);
}

imu = io_alloc_imu(ctx, nr_pages);
@@ -817,7 +819,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,

size = iov->iov_len;
/* store original address for later verification */
- imu->ubuf = (unsigned long) iov->iov_base;
+ imu->ubuf = (unsigned long)iov->iov_base;
imu->len = iov->iov_len;
imu->folio_shift = PAGE_SHIFT;
imu->release = io_release_ubuf;
@@ -885,8 +887,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
u64 tag = 0;

if (arg) {
- uvec = (struct iovec __user *) arg;
- iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx));
+ uvec = (struct iovec __user *)arg;
+ iov = iovec_from_user(uvec, 1, 1, &fast_iov,
+ io_is_compat(ctx));
if (IS_ERR(iov)) {
ret = PTR_ERR(iov);
break;
@@ -1050,8 +1053,7 @@ static int io_import_kbuf(int ddir, struct iov_iter *iter,
}

static int io_import_fixed(int ddir, struct iov_iter *iter,
- struct io_mapped_ubuf *imu,
- u64 buf_addr, size_t len)
+ struct io_mapped_ubuf *imu, u64 buf_addr, size_t len)
{
const struct bio_vec *bvec;
size_t folio_mask;
@@ -1095,7 +1097,8 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
bvec += seg_skip;
offset &= folio_mask;
}
- nr_segs = (offset + len + bvec->bv_offset + folio_mask) >> imu->folio_shift;
+ nr_segs = (offset + len + bvec->bv_offset + folio_mask) >>
+ imu->folio_shift;
iov_iter_bvec(iter, ddir, bvec, nr_segs, len);
iter->iov_offset = offset;
return 0;
@@ -1124,9 +1127,8 @@ inline struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
return NULL;
}

-int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
- u64 buf_addr, size_t len, int ddir,
- unsigned issue_flags)
+int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter, u64 buf_addr,
+ size_t len, int ddir, unsigned int issue_flags)
{
struct io_rsrc_node *node;

@@ -1146,7 +1148,8 @@ static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
}

/* Both rings are locked by the caller. */
-static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
+static int io_clone_buffers(struct io_ring_ctx *ctx,
+ struct io_ring_ctx *src_ctx,
struct io_uring_clone_buffers *arg)
{
struct io_rsrc_data data;
@@ -1160,7 +1163,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
* Accounting state is shared between the two rings; that only works if
* both rings are accounted towards the same counters.
*/
- if (ctx->user != src_ctx->user || ctx->mm_account != src_ctx->mm_account)
+ if (ctx->user != src_ctx->user ||
+ ctx->mm_account != src_ctx->mm_account)
return -EINVAL;

/* if offsets are given, must have nr specified too */
@@ -1268,7 +1272,8 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)

if (copy_from_user(&buf, arg, sizeof(buf)))
return -EFAULT;
- if (buf.flags & ~(IORING_REGISTER_SRC_REGISTERED|IORING_REGISTER_DST_REPLACE))
+ if (buf.flags &
+ ~(IORING_REGISTER_SRC_REGISTERED | IORING_REGISTER_DST_REPLACE))
return -EINVAL;
if (!(buf.flags & IORING_REGISTER_DST_REPLACE) && ctx->buf_table.nr)
return -EBUSY;
@@ -1303,6 +1308,165 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
return ret;
}

+static int io_clone_files(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
+ struct io_uring_clone_files *arg)
+{
+ struct io_file_table new_file_table;
+ int i, off, nr;
+ unsigned int src_nr;
+
+ lockdep_assert_held(&ctx->uring_lock);
+ lockdep_assert_held(&src_ctx->uring_lock);
+
+ /* if offsets are given, must have nr specified too */
+ if (!arg->nr && (arg->dst_off || arg->src_off))
+ return -EINVAL;
+ /* not allowed unless REPLACE is set */
+ if (ctx->file_table.data.nr &&
+ !(arg->flags & IORING_REGISTER_DST_REPLACE))
+ return -EBUSY;
+
+ src_nr = src_ctx->file_table.data.nr;
+ if (!src_nr)
+ return -ENXIO;
+ if (!arg->nr)
+ arg->nr = src_nr;
+ else if (arg->nr > src_nr)
+ return -EINVAL;
+ else if (arg->nr > IORING_MAX_FIXED_FILES)
+ return -EINVAL;
+ if (check_add_overflow(arg->nr, arg->src_off, &off) || off > src_nr)
+ return -EOVERFLOW;
+ if (check_add_overflow(arg->nr, arg->dst_off, &src_nr))
+ return -EOVERFLOW;
+ if (src_nr > IORING_MAX_FIXED_FILES)
+ return -EINVAL;
+ /* Allocate file tables memory {data + bitmap} into new_file_table */
+ memset(&new_file_table, 0, sizeof(new_file_table));
+ if (!io_alloc_file_tables(ctx, &new_file_table,
+ max(src_nr, ctx->file_table.data.nr)))
+ return -ENOMEM;
+
+ /* Copy original dst nodes from before the cloned range */
+ for (i = 0; i < min(arg->dst_off, ctx->file_table.data.nr); i++) {
+ struct io_rsrc_node *node = ctx->file_table.data.nodes[i];
+
+ if (node) {
+ new_file_table.data.nodes[i] = node;
+ node->refs++;
+ io_file_bitmap_set(&new_file_table, i);
+ }
+ }
+
+ off = arg->dst_off;
+ i = arg->src_off;
+ nr = arg->nr;
+ while (nr--) {
+ struct io_rsrc_node *dst_node, *src_node;
+
+ src_node = io_rsrc_node_lookup(&src_ctx->file_table.data, i);
+ if (!src_node) {
+ dst_node = NULL;
+ } else {
+ dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
+ if (!dst_node) {
+ io_free_file_tables(ctx, &new_file_table);
+ return -ENOMEM;
+ }
+
+ struct file *file = io_slot_file(src_node);
+
+ get_file(file);
+ io_fixed_file_set(dst_node, file);
+ }
+ new_file_table.data.nodes[off] = dst_node;
+ if (dst_node)
+ io_file_bitmap_set(&new_file_table, off);
+
+ i++;
+ off++;
+ }
+
+ /* Copy original dst nodes from after the cloned range */
+ for (i = src_nr; i < ctx->file_table.data.nr; i++) {
+ struct io_rsrc_node *node = ctx->file_table.data.nodes[i];
+
+ if (node) {
+ new_file_table.data.nodes[i] = node;
+ node->refs++;
+ io_file_bitmap_set(&new_file_table, i);
+ }
+ }
+
+ /*
+ * If asked for replace, put the old table. new_file_table.data->nodes[] holds both
+ * old and new nodes at this point.
+ */
+ if (arg->flags & IORING_REGISTER_DST_REPLACE)
+ io_free_file_tables(ctx, &ctx->file_table);
+
+ /*
+ * ctx->file_table must be empty now - either the contents are being
+ * replaced and we just freed the table, or the contents are being
+ * copied to a ring that does not have buffers yet (checked at function
+ * entry).
+ */
+ WARN_ON_ONCE(ctx->file_table.data.nr);
+ ctx->file_table = new_file_table;
+ io_file_table_set_alloc_range(ctx, 0, ctx->file_table.data.nr);
+ return 0;
+}
+
+int io_register_clone_files(struct io_ring_ctx *ctx, void __user *arg)
+{
+ struct io_uring_clone_files clone_arg;
+ struct io_ring_ctx *src_ctx;
+ bool registered_src;
+ struct file *file;
+ int ret;
+
+ if (copy_from_user(&clone_arg, arg, sizeof(clone_arg)))
+ return -EFAULT;
+ if (clone_arg.flags &
+ ~(IORING_REGISTER_SRC_REGISTERED | IORING_REGISTER_DST_REPLACE))
+ return -EINVAL;
+ /* not allowed unless REPLACE is set */
+ if (!(clone_arg.flags & IORING_REGISTER_DST_REPLACE) &&
+ ctx->file_table.data.nr)
+ return -EBUSY;
+ if (memchr_inv(clone_arg.pad, 0, sizeof(clone_arg.pad)))
+ return -EINVAL;
+
+ registered_src = (clone_arg.flags & IORING_REGISTER_SRC_REGISTERED) !=
+ 0;
+ file = io_uring_ctx_get_file(clone_arg.src_fd, registered_src);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ src_ctx = file->private_data;
+ if (src_ctx != ctx) {
+ mutex_unlock(&ctx->uring_lock);
+ lock_two_rings(ctx, src_ctx);
+
+ /* Prevent cross-process hijacking */
+ if (src_ctx->submitter_task &&
+ src_ctx->submitter_task != current) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
+ ret = io_clone_files(ctx, src_ctx, &clone_arg);
+
+out:
+ if (src_ctx != ctx)
+ mutex_unlock(&src_ctx->uring_lock);
+
+ if (!registered_src)
+ fput(file);
+ return ret;
+}
+
void io_vec_free(struct iou_vec *iv)
{
if (!iv->iovec)
@@ -1328,9 +1492,8 @@ int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries)
}

static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
- struct io_mapped_ubuf *imu,
- struct iovec *iovec, unsigned nr_iovs,
- struct iou_vec *vec)
+ struct io_mapped_ubuf *imu, struct iovec *iovec,
+ unsigned int nr_iovs, struct iou_vec *vec)
{
unsigned long folio_size = 1 << imu->folio_shift;
unsigned long folio_mask = folio_size - 1;
@@ -1352,7 +1515,8 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,

if (unlikely(!iov_len))
return -EFAULT;
- if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
+ if (unlikely(
+ check_add_overflow(total_len, iov_len, &total_len)))
return -EOVERFLOW;

offset = buf_addr - imu->ubuf;
@@ -1366,11 +1530,11 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
offset &= folio_mask;

for (; iov_len; offset = 0, bvec_idx++, src_bvec++) {
- size_t seg_size = min_t(size_t, iov_len,
- folio_size - offset);
+ size_t seg_size =
+ min_t(size_t, iov_len, folio_size - offset);

- bvec_set_page(&res_bvec[bvec_idx],
- src_bvec->bv_page, seg_size, offset);
+ bvec_set_page(&res_bvec[bvec_idx], src_bvec->bv_page,
+ seg_size, offset);
iov_len -= seg_size;
}
}
@@ -1411,7 +1575,7 @@ static int io_vec_fill_kern_bvec(int ddir, struct iov_iter *iter,
size_t offset = (size_t)(uintptr_t)iovec[iov_idx].iov_base;
size_t iov_len = iovec[iov_idx].iov_len;
struct bvec_iter bi = {
- .bi_size = offset + iov_len,
+ .bi_size = offset + iov_len,
};
struct bio_vec bv;

@@ -1439,7 +1603,7 @@ static int iov_kern_bvec_size(const struct iovec *iov,
return ret;

for (i = 0; off < offset + iov->iov_len && i < imu->nr_bvecs;
- off += bvec[i].bv_len, i++) {
+ off += bvec[i].bv_len, i++) {
if (offset >= off && offset < off + bvec[i].bv_len)
start = i;
}
@@ -1472,9 +1636,9 @@ static int io_kern_bvec_size(struct iovec *iov, unsigned nr_iovs,
return 0;
}

-int io_import_reg_vec(int ddir, struct iov_iter *iter,
- struct io_kiocb *req, struct iou_vec *vec,
- unsigned nr_iovs, unsigned issue_flags)
+int io_import_reg_vec(int ddir, struct iov_iter *iter, struct io_kiocb *req,
+ struct iou_vec *vec, unsigned int nr_iovs,
+ unsigned int issue_flags)
{
struct io_rsrc_node *node;
struct io_mapped_ubuf *imu;
@@ -1531,7 +1695,8 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
}

if (imu->flags & IO_REGBUF_F_KBUF)
- return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec);
+ return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs,
+ vec);

return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec);
}
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 44e3386f7c1c..670345be036f 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -5,16 +5,16 @@
#include <linux/io_uring_types.h>
#include <linux/lockdep.h>

-#define IO_VEC_CACHE_SOFT_CAP 256
+#define IO_VEC_CACHE_SOFT_CAP 256

enum {
- IORING_RSRC_FILE = 0,
- IORING_RSRC_BUFFER = 1,
+ IORING_RSRC_FILE = 0,
+ IORING_RSRC_BUFFER = 1,
};

struct io_rsrc_node {
- unsigned char type;
- int refs;
+ unsigned char type;
+ int refs;

u64 tag;
union {
@@ -24,36 +24,36 @@ struct io_rsrc_node {
};

enum {
- IO_IMU_DEST = 1 << ITER_DEST,
- IO_IMU_SOURCE = 1 << ITER_SOURCE,
+ IO_IMU_DEST = 1 << ITER_DEST,
+ IO_IMU_SOURCE = 1 << ITER_SOURCE,
};

enum {
- IO_REGBUF_F_KBUF = 1,
+ IO_REGBUF_F_KBUF = 1,
};

struct io_mapped_ubuf {
- u64 ubuf;
- unsigned int len;
- unsigned int nr_bvecs;
- unsigned int folio_shift;
- refcount_t refs;
- unsigned long acct_pages;
- void (*release)(void *);
- void *priv;
- u8 flags;
- u8 dir;
- struct bio_vec bvec[] __counted_by(nr_bvecs);
+ u64 ubuf;
+ unsigned int len;
+ unsigned int nr_bvecs;
+ unsigned int folio_shift;
+ refcount_t refs;
+ unsigned long acct_pages;
+ void (*release)(void *data);
+ void *priv;
+ u8 flags;
+ u8 dir;
+ struct bio_vec bvec[] __counted_by(nr_bvecs);
};

struct io_imu_folio_data {
/* Head folio can be partially included in the fixed buf */
- unsigned int nr_pages_head;
+ unsigned int nr_pages_head;
/* For non-head/tail folios, has to be fully included */
- unsigned int nr_pages_mid;
- unsigned int folio_shift;
- unsigned int nr_folios;
- unsigned long first_folio_page_idx;
+ unsigned int nr_pages_mid;
+ unsigned int folio_shift;
+ unsigned int nr_folios;
+ unsigned long first_folio_page_idx;
};

bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
@@ -65,16 +65,16 @@ int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);

struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
unsigned issue_flags);
-int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
- u64 buf_addr, size_t len, int ddir,
- unsigned issue_flags);
-int io_import_reg_vec(int ddir, struct iov_iter *iter,
- struct io_kiocb *req, struct iou_vec *vec,
- unsigned nr_iovs, unsigned issue_flags);
+int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter, u64 buf_addr,
+ size_t len, int ddir, unsigned int issue_flags);
+int io_import_reg_vec(int ddir, struct iov_iter *iter, struct io_kiocb *req,
+ struct iou_vec *vec, unsigned int nr_iovs,
+ unsigned int issue_flags);
int io_prep_reg_iovec(struct io_kiocb *req, struct iou_vec *iv,
- const struct iovec __user *uvec, size_t uvec_segs);
+ const struct iovec __user *uvec, size_t uvec_segs);

int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg);
+int io_register_clone_files(struct io_ring_ctx *ctx, void __user *arg);
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned int nr_args, u64 __user *tags);
@@ -87,21 +87,22 @@ int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
unsigned size, unsigned type);
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
- unsigned int size, unsigned int type);
+ unsigned int size, unsigned int type);
int io_validate_user_buf_range(u64 uaddr, u64 ulen);

bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
struct io_imu_folio_data *data);

-static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data,
- unsigned int index)
+static inline struct io_rsrc_node *
+io_rsrc_node_lookup(struct io_rsrc_data *data, unsigned int index)
{
if (index < data->nr)
return data->nodes[array_index_nospec(index, data->nr)];
return NULL;
}

-static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
+static inline void io_put_rsrc_node(struct io_ring_ctx *ctx,
+ struct io_rsrc_node *node)
{
lockdep_assert_held(&ctx->uring_lock);
if (!--node->refs)
@@ -143,8 +144,8 @@ static inline void __io_unaccount_mem(struct user_struct *user,
void io_vec_free(struct iou_vec *iv);
int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries);

-static inline void io_vec_reset_iovec(struct iou_vec *iv,
- struct iovec *iovec, unsigned nr)
+static inline void io_vec_reset_iovec(struct iou_vec *iv, struct iovec *iovec,
+ unsigned int nr)
{
io_vec_free(iv);
iv->iovec = iovec;
--
2.54.0