[PATCH 5.17 391/772] io_uring: fix assuming triggered poll waitqueue is the single poll

From: Greg Kroah-Hartman
Date: Tue Jun 07 2022 - 17:18:04 EST


From: Jens Axboe <axboe@xxxxxxxxx>

[ Upstream commit d89a4fac0fbc6fe5fc24d1c9a889440dcf410368 ]

syzbot reports a recent regression:

BUG: KASAN: use-after-free in __wake_up_common+0x637/0x650 kernel/sched/wait.c:101
Read of size 8 at addr ffff888011e8a130 by task syz-executor413/3618

CPU: 0 PID: 3618 Comm: syz-executor413 Tainted: G W 5.17.0-syzkaller-01402-g8565d64430f8 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_address_description.constprop.0.cold+0x8d/0x303 mm/kasan/report.c:255
__kasan_report mm/kasan/report.c:442 [inline]
kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
__wake_up_common+0x637/0x650 kernel/sched/wait.c:101
__wake_up_common_lock+0xd0/0x130 kernel/sched/wait.c:138
tty_release+0x657/0x1200 drivers/tty/tty_io.c:1781
__fput+0x286/0x9f0 fs/file_table.c:317
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
exit_task_work include/linux/task_work.h:32 [inline]
do_exit+0xaff/0x29d0 kernel/exit.c:806
do_group_exit+0xd2/0x2f0 kernel/exit.c:936
__do_sys_exit_group kernel/exit.c:947 [inline]
__se_sys_exit_group kernel/exit.c:945 [inline]
__x64_sys_exit_group+0x3a/0x50 kernel/exit.c:945
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f439a1fac69

which is due to leaving the request on the waitqueue mistakenly. The
reproducer is using a tty device, which means we end up arming the same
poll queue twice (it uses the same poll waitqueue for both), but in
io_poll_wake() we always just clear REQ_F_SINGLE_POLL regardless of which
entry triggered. This leaves one waitqueue potentially armed after we're
done, which then blows up in tty when the waitqueue is attempted removed.

We have no room to store this information, so simply encode it in the
wait_queue_entry->private where we store the io_kiocb request pointer.

Fixes: 91eac1c69c20 ("io_uring: cache poll/double-poll state with a request flag")
Reported-by: syzbot+09ad4050dd3a120bfccd@xxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
fs/io_uring.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8711d92f4d71..25b0832e0fc3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5632,10 +5632,13 @@ static void io_poll_cancel_req(struct io_kiocb *req)
io_poll_execute(req, 0);
}

+#define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1))
+#define wqe_is_double(wait) ((unsigned long) (wait)->private & 1)
+
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
{
- struct io_kiocb *req = wait->private;
+ struct io_kiocb *req = wqe_to_req(wait);
struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb,
wait);
__poll_t mask = key_to_poll(key);
@@ -5673,7 +5676,10 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && poll->events & EPOLLONESHOT) {
list_del_init(&poll->wait.entry);
poll->head = NULL;
- req->flags &= ~REQ_F_SINGLE_POLL;
+ if (wqe_is_double(wait))
+ req->flags &= ~REQ_F_DOUBLE_POLL;
+ else
+ req->flags &= ~REQ_F_SINGLE_POLL;
}
__io_poll_execute(req, mask);
}
@@ -5685,6 +5691,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
struct io_poll_iocb **poll_ptr)
{
struct io_kiocb *req = pt->req;
+ unsigned long wqe_private = (unsigned long) req;

/*
* The file being polled uses multiple waitqueues for poll handling
@@ -5710,6 +5717,8 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
pt->error = -ENOMEM;
return;
}
+ /* mark as double wq entry */
+ wqe_private |= 1;
req->flags |= REQ_F_DOUBLE_POLL;
io_init_poll_iocb(poll, first->events, first->wait.func);
*poll_ptr = poll;
@@ -5720,7 +5729,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
req->flags |= REQ_F_SINGLE_POLL;
pt->nr_entries++;
poll->head = head;
- poll->wait.private = req;
+ poll->wait.private = (void *) wqe_private;

if (poll->events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(head, &poll->wait);
@@ -5747,7 +5756,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
INIT_HLIST_NODE(&req->hash_node);
io_init_poll_iocb(poll, mask, io_poll_wake);
poll->file = req->file;
- poll->wait.private = req;

ipt->pt._key = mask;
ipt->req = req;
--
2.35.1