Re: [PATCH] aio: prevent the final fput() in the middle of vfs_poll() (Re: KASAN: use-after-free Read in unix_dgram_poll)

From: Linus Torvalds
Date: Sun Mar 03 2019 - 17:35:15 EST


On Sun, Mar 3, 2019 at 12:30 PM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
>
> On Sun, Mar 03, 2019 at 11:44:33AM -0800, Linus Torvalds wrote:
> >
> > I'm assuming you're talking about the second vfs_poll() in
> > aio_poll_complete_work()? The one we call before we check for
> > "rew->cancelled" properly under the spinlock?
>
> No. The first one, right in aio_poll().

Ok, they are both confusing. The lifetime of that filp is just not
clear, with the whole "it could have been completed asynchronously"
issue.

> I'll need to dig out the mail archive from last year, but IIRC this
> had been considered and there'd been non-trivial problems. Give me
> an hour or so and I'll dig that out (there'd been many rounds of
> review, with long threads involved, some private, some on fsdevel).

Looking more at the patch, it still looks eminently sane to me.I fixed
the silly "ki_filp" thing you noticed (I thought we made fput() take
NULL, but you're right we don't), and simplified the patch to not do
the changes that aren't necessary.

I fixed the silly "filp can be NULL" issue you pointed out (I lazily
thought we allowed fput(NULL), but you're right, we definitely don't),
and simplified the patch to not do the unnecessary changes where we
can just access the file pointer multiple different ways.

And the resulting kernel boots fine, but I doubt I have anything that
actually uses io_submit(), so that doesn't mean much.

Slightly updated patch attached anyway, even smaller than before:

2 files changed, 36 insertions(+), 44 deletions(-)

with several of the new lines being comments about that file pointer
placement in the union.

Linus
fs/aio.c | 72 ++++++++++++++++++++++--------------------------------
include/linux/fs.h | 8 +++++-
2 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index aaaaf4d12c73..82c08422b0f4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -167,9 +167,13 @@ struct kioctx {
unsigned id;
};

+/*
+ * First field must be the file pointer in all the
+ * iocb unions! See also 'struct kiocb' in <linux/fs.h>
+ */
struct fsync_iocb {
- struct work_struct work;
struct file *file;
+ struct work_struct work;
bool datasync;
};

@@ -183,8 +187,15 @@ struct poll_iocb {
struct work_struct work;
};

+/*
+ * NOTE! Each of the iocb union members has the file pointer
+ * as the first entry in their struct definition. So you can
+ * access the file pointer through any of the sub-structs,
+ * or directly as just 'ki_filp' in this struct.
+ */
struct aio_kiocb {
union {
+ struct file *ki_filp;
struct kiocb rw;
struct fsync_iocb fsync;
struct poll_iocb poll;
@@ -1060,6 +1071,8 @@ static inline void iocb_put(struct aio_kiocb *iocb)
{
if (refcount_read(&iocb->ki_refcnt) == 0 ||
refcount_dec_and_test(&iocb->ki_refcnt)) {
+ if (iocb->ki_filp)
+ fput(iocb->ki_filp);
percpu_ref_put(&iocb->ki_ctx->reqs);
kmem_cache_free(kiocb_cachep, iocb);
}
@@ -1424,7 +1437,6 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
file_end_write(kiocb->ki_filp);
}

- fput(kiocb->ki_filp);
aio_complete(iocb, res, res2);
}

@@ -1432,9 +1444,6 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{
int ret;

- req->ki_filp = fget(iocb->aio_fildes);
- if (unlikely(!req->ki_filp))
- return -EBADF;
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
@@ -1451,7 +1460,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
- goto out_fput;
+ return ret;
}

req->ki_ioprio = iocb->aio_reqprio;
@@ -1460,14 +1469,10 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)

ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
- goto out_fput;
+ return ret;

req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
return 0;
-
-out_fput:
- fput(req->ki_filp);
- return ret;
}

static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
@@ -1521,24 +1526,19 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
if (ret)
return ret;
file = req->ki_filp;
-
- ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- goto out_fput;
+ return -EBADF;
ret = -EINVAL;
if (unlikely(!file->f_op->read_iter))
- goto out_fput;
+ return -EINVAL;

ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret)
- goto out_fput;
+ return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
aio_rw_done(req, call_read_iter(file, req, &iter));
kfree(iovec);
-out_fput:
- if (unlikely(ret))
- fput(file);
return ret;
}

@@ -1555,16 +1555,14 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
return ret;
file = req->ki_filp;

- ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out_fput;
- ret = -EINVAL;
+ return -EBADF;
if (unlikely(!file->f_op->write_iter))
- goto out_fput;
+ return -EINVAL;

ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret)
- goto out_fput;
+ return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
/*
@@ -1582,9 +1580,6 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
-out_fput:
- if (unlikely(ret))
- fput(file);
return ret;
}

@@ -1594,7 +1589,6 @@ static void aio_fsync_work(struct work_struct *work)
int ret;

ret = vfs_fsync(req->file, req->datasync);
- fput(req->file);
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
}

@@ -1605,13 +1599,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
iocb->aio_rw_flags))
return -EINVAL;

- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;
- if (unlikely(!req->file->f_op->fsync)) {
- fput(req->file);
+ if (unlikely(!req->file->f_op->fsync))
return -EINVAL;
- }

req->datasync = datasync;
INIT_WORK(&req->work, aio_fsync_work);
@@ -1621,10 +1610,7 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,

static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
{
- struct file *file = iocb->poll.file;
-
aio_complete(iocb, mangle_poll(mask), 0);
- fput(file);
}

static void aio_poll_complete_work(struct work_struct *work)
@@ -1743,9 +1729,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)

INIT_WORK(&req->work, aio_poll_complete_work);
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;

req->head = NULL;
req->woken = false;
@@ -1788,10 +1771,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
spin_unlock_irq(&ctx->ctx_lock);

out:
- if (unlikely(apt.error)) {
- fput(req->file);
+ if (unlikely(apt.error))
return apt.error;
- }

if (mask)
aio_poll_complete(aiocb, mask);
@@ -1829,6 +1810,11 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
if (unlikely(!req))
goto out_put_reqs_available;

+ req->ki_filp = fget(iocb->aio_fildes);
+ ret = -EBADF;
+ if (unlikely(!req->ki_filp))
+ goto out_put_req;
+
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29d8e2cfed0e..fd423fec8d83 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -304,13 +304,19 @@ enum rw_hint {

struct kiocb {
struct file *ki_filp;
+
+ /* The 'ki_filp' pointer is shared in a union for aio */
+ randomized_struct_fields_start
+
loff_t ki_pos;
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
-} __randomize_layout;
+
+ randomized_struct_fields_end
+};

static inline bool is_sync_kiocb(struct kiocb *kiocb)
{