Re: [PATCH] fs/io_uring: fix O_PATH fds in openat, openat2, statx

From: Jens Axboe
Date: Thu May 07 2020 - 16:53:34 EST


On 5/7/20 1:29 PM, Al Viro wrote:
> On Thu, May 07, 2020 at 01:05:23PM -0600, Jens Axboe wrote:
>> On 5/7/20 1:01 PM, Al Viro wrote:
>>> On Thu, May 07, 2020 at 08:57:25PM +0200, Max Kellermann wrote:
>>>> If an operation's flag `needs_file` is set, the function
>>>> io_req_set_file() calls io_file_get() to obtain a `struct file*`.
>>>>
>>>> This fails for `O_PATH` file descriptors, because those have no
>>>> `struct file*`
>>>
>>> O_PATH descriptors most certainly *do* have that. What the hell
>>> are you talking about?
>>
>> Yeah, hence I was interested in the test case. Since this is
>> bypassing that part, was assuming we'd have some logic error
>> that attempted a file grab for a case where we shouldn't.
>
> Just in case - you do realize that you should either resolve the
> descriptor yourself (and use the resulting struct file *, without
> letting anyone even look at the descriptor) *or* pass the
> descriptor as-is and don't even look at the descriptor table?
>
> Once more, with feeling:
>
> Descriptor tables are inherently sharable objects. You can't resolve
> a descriptor twice and assume you'll get the same thing both times.
> You can't insert something into descriptor table and assume that the
> same slot will be holding the same struct file reference after
> the descriptor table has been unlocked.
>
> Again, resolving the descriptor more than once in course of syscall
> is almost always a serious bug; there are very few exceptions and
> none of the mentioned in that patch are anywhere near those.
>
> IOW, that patch will either immediately break things on O_PATH
> (if you are really passing struct file *) or it's probably correct,
> but the reason is entirely different - it's that you are passing
> descriptor, which gets resolved by whatever you are calling, in
> which case io_uring has no business resolving it. And if that's
> the case, you are limited to real descriptors - your descriptor
> table lookalikes won't be of any use.

I think the patch is correct as-is, I took a good look at how we're
currently handling it. None of those three ops should fiddle with
the fd at all, and all of them do forbid the use of fixed files (the
descriptor table look-alikes), so that part is fine, too.

There's some low hanging fruit around optimizing and improving it,
I'm including an updated version below. Max, can you double check
with your testing?


diff --git a/fs/io_uring.c b/fs/io_uring.c
index dd680eb153cb..979d9f977409 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -680,8 +680,6 @@ struct io_op_def {
unsigned needs_mm : 1;
/* needs req->file assigned */
unsigned needs_file : 1;
- /* needs req->file assigned IFF fd is >= 0 */
- unsigned fd_non_neg : 1;
/* hash wq insertion if file is a regular file */
unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */
@@ -784,8 +782,6 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
},
[IORING_OP_OPENAT] = {
- .needs_file = 1,
- .fd_non_neg = 1,
.file_table = 1,
.needs_fs = 1,
},
@@ -799,8 +795,6 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_STATX] = {
.needs_mm = 1,
- .needs_file = 1,
- .fd_non_neg = 1,
.needs_fs = 1,
.file_table = 1,
},
@@ -837,8 +831,6 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
},
[IORING_OP_OPENAT2] = {
- .needs_file = 1,
- .fd_non_neg = 1,
.file_table = 1,
.needs_fs = 1,
},
@@ -5368,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_steal_work(req, workptr);
}

-static int io_req_needs_file(struct io_kiocb *req, int fd)
-{
- if (!io_op_defs[req->opcode].needs_file)
- return 0;
- if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
- return 0;
- return 1;
-}
-
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
int index)
{
@@ -5414,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
}

static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- int fd, unsigned int flags)
+ int fd)
{
bool fixed;

- if (!io_req_needs_file(req, fd))
- return 0;
-
- fixed = (flags & IOSQE_FIXED_FILE);
+ fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
if (unlikely(!fixed && req->needs_fixed_file))
return -EBADF;

@@ -5798,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
struct io_submit_state *state, bool async)
{
unsigned int sqe_flags;
- int id, fd;
+ int id;

/*
* All io need record the previous position, if LINK vs DARIN,
@@ -5850,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
IOSQE_ASYNC | IOSQE_FIXED_FILE |
IOSQE_BUFFER_SELECT | IOSQE_IO_LINK);

- fd = READ_ONCE(sqe->fd);
- return io_req_set_file(state, req, fd, sqe_flags);
+ if (!io_op_defs[req->opcode].needs_file)
+ return 0;
+
+ return io_req_set_file(state, req, READ_ONCE(sqe->fd));
}

static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,

--
Jens Axboe