[PATCH 1/2] ublk: reset per-IO canceled flag on each fetch

From: Uday Shankar

Date: Fri Apr 03 2026 - 23:26:01 EST


If a ublk server starts recovering devices but dies before issuing fetch
commands for all IOs, cancellation of the fetch commands that were
successfully issued may never complete. This is because the per-IO
canceled flag can remain set even after the fetch for that IO has been
submitted - the per-IO canceled flags for all IOs in a queue are reset
together only once all IOs for that queue have been fetched. So if a
nonempty proper subset of the IOs for a queue are fetched when the ublk
server dies, the IOs in that subset will never successfully be canceled,
as their canceled flags remain set, and this prevents ublk_cancel_cmd
from actually calling io_uring_cmd_done on the commands, despite the
fact that they are outstanding.

Fix this by resetting the per-IO cancel flags immediately when each IO
is fetched instead of waiting for all IOs for the queue (which may never
happen).

Signed-off-by: Uday Shankar <ushankar@xxxxxxxxxxxxxxx>
---
drivers/block/ublk_drv.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 3ba7da94d31499590a06a8b307ed151919a027cb..92dabeb820344107c9fadfae94396082b933d84e 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -2916,22 +2916,26 @@ static void ublk_stop_dev(struct ublk_device *ub)
ublk_cancel_dev(ub);
}

+static void ublk_reset_io_flags(struct ublk_queue *ubq, struct ublk_io *io)
+{
+ /* UBLK_IO_FLAG_CANCELED can be cleared now */
+ spin_lock(&ubq->cancel_lock);
+ io->flags &= ~UBLK_IO_FLAG_CANCELED;
+ spin_unlock(&ubq->cancel_lock);
+}
+
/* reset per-queue io flags */
static void ublk_queue_reset_io_flags(struct ublk_queue *ubq)
{
- int j;
-
- /* UBLK_IO_FLAG_CANCELED can be cleared now */
spin_lock(&ubq->cancel_lock);
- for (j = 0; j < ubq->q_depth; j++)
- ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED;
ubq->canceling = false;
spin_unlock(&ubq->cancel_lock);
ubq->fail_io = false;
}

/* device can only be started after all IOs are ready */
-static void ublk_mark_io_ready(struct ublk_device *ub, u16 q_id)
+static void ublk_mark_io_ready(struct ublk_device *ub, u16 q_id,
+ struct ublk_io *io)
__must_hold(&ub->mutex)
{
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
@@ -2940,6 +2944,7 @@ static void ublk_mark_io_ready(struct ublk_device *ub, u16 q_id)
ub->unprivileged_daemons = true;

ubq->nr_io_ready++;
+ ublk_reset_io_flags(ubq, io);

/* Check if this specific queue is now fully ready */
if (ublk_queue_ready(ubq)) {
@@ -3202,7 +3207,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub,
if (!ret)
ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL);
if (!ret)
- ublk_mark_io_ready(ub, q_id);
+ ublk_mark_io_ready(ub, q_id, io);
mutex_unlock(&ub->mutex);
return ret;
}
@@ -3610,7 +3615,7 @@ static int ublk_batch_prep_io(struct ublk_queue *ubq,
ublk_io_unlock(io);

if (!ret)
- ublk_mark_io_ready(data->ub, ubq->q_id);
+ ublk_mark_io_ready(data->ub, ubq->q_id, io);

return ret;
}

--
2.34.1