Hi,
-----Original Message-----
From: Bui Quang Minh <minhquangbui99@xxxxxxxxx>
Sent: Sunday, January 12, 2025 10:34 PM
To: linux-kernel@xxxxxxxxxxxxxxx
Cc: Bui Quang Minh <minhquangbui99@xxxxxxxxx>; Jens Axboe
<axboe@xxxxxxxxx>; Pavel Begunkov <asml.silence@xxxxxxxxx>; io-
uring@xxxxxxxxxxxxxxx;
syzbot+3c750be01dab672c513d@xxxxxxxxxxxxxxxxxxxxxxxxx; lizetao
<lizetao1@xxxxxxxxxx>
Subject: [PATCH] io_uring: simplify the SQPOLL thread check when cancelling
requests
In io_uring_try_cancel_requests, we check whether sq_data->thread ==
current to determine if the function is called by the SQPOLL thread to do iopoll
when IORING_SETUP_SQPOLL is set. This check can race with the SQPOLL
thread termination.
io_uring_cancel_generic is used in 2 places: io_uring_cancel_generic and
io_ring_exit_work. In io_uring_cancel_generic, we have the information
whether the current is SQPOLL thread already. In io_ring_exit_work, in case
the SQPOLL thread reaches this path, we don't need to iopoll and leave that for
io_uring_cancel_generic to handle.
So to avoid the racy check, this commit adds a boolean flag to
io_uring_try_cancel_requests to determine if we need to do iopoll inside the
function and only sets this flag in io_uring_cancel_generic when the current is
SQPOLL thread.
Reported-by: syzbot+3c750be01dab672c513d@xxxxxxxxxxxxxxxxxxxxxxxxx
Reported-by: Li Zetao <lizetao1@xxxxxxxxxx>
Signed-off-by: Bui Quang Minh <minhquangbui99@xxxxxxxxx>
---
io_uring/io_uring.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index
ff691f37462c..f28ea1254143 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -143,7 +143,8 @@ struct io_defer_entry {
static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct io_uring_task *tctx,
- bool cancel_all);
+ bool cancel_all,
+ bool force_iopoll);
static void io_queue_sqe(struct io_kiocb *req);
@@ -2898,7 +2899,12 @@ static __cold void io_ring_exit_work(struct
work_struct *work)
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
io_move_task_work_from_local(ctx);
- while (io_uring_try_cancel_requests(ctx, NULL, true))
+ /*
+ * Even if SQPOLL thread reaches this path, don't force
+ * iopoll here, let the io_uring_cancel_generic handle
+ * it.
Just curious, will sq_thread enter this io_ring_exit_work path?
+ */
+ while (io_uring_try_cancel_requests(ctx, NULL, true, false))
cond_resched();
if (ctx->sq_data) {
@@ -3066,7 +3072,8 @@ static __cold bool io_uring_try_cancel_iowq(struct
io_ring_ctx *ctx)
static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct io_uring_task *tctx,
- bool cancel_all)
+ bool cancel_all,
+ bool force_iopoll)
{
struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
enum io_wq_cancel cret;
@@ -3096,7 +3103,7 @@ static __cold bool
io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
/* SQPOLL thread does its own polling */
if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
- (ctx->sq_data && ctx->sq_data->thread == current)) {
+ force_iopoll) {
while (!wq_list_empty(&ctx->iopoll_list)) {
io_iopoll_try_reap_events(ctx);
ret = true;
@@ -3169,13 +3176,15 @@ __cold void io_uring_cancel_generic(bool
cancel_all, struct io_sq_data *sqd)
continue;
loop |= io_uring_try_cancel_requests(node-
ctx,current->io_uring,
- cancel_all);
+ cancel_all,
+ false);
}
} else {
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
loop |= io_uring_try_cancel_requests(ctx,
current-
io_uring,- cancel_all);
+ cancel_all,
+ true);
}
if (loop) {
--
2.43.0
Maybe you miss something, just like Begunkov mentioned in your last version patch:
io_uring_cancel_generic
WARN_ON_ONCE(sqd && sqd->thread != current);
This WARN_ON_ONCE will never be triggered, so you could remove it.