[PATCH] nvme: bound the freeze drain in passthrough commands
From: Chao Shi
Date: Wed May 27 2026 - 02:00:20 EST
nvme_passthru_start() drains in-flight I/O via the unbounded
nvme_wait_freeze() before submitting a command with command-set
effects (Format NVM, Sanitize, Namespace Management, vendor unique).
If a completion is silently dropped or the device hangs, the calling
task wedges with ctrl->scan_lock and ctrl->subsys->lock held, fanning
out into hung-task reports on any concurrent open/close/passthru on
the same controller:
INFO: task syz-executor:NNNN blocked for more than 123 seconds.
nvme_wait_freeze+0x82/0x100
nvme_passthru_start drivers/nvme/host/core.c:1249 [inline]
nvme_submit_user_cmd+0x1ee/0x3d0 drivers/nvme/host/ioctl.c:189
The other freeze-drain sites (pci shutdown, tcp/rdma reset) already
bound the wait with nvme_wait_freeze_timeout(NVME_IO_TIMEOUT). Apply
it here too; on timeout, unwind the freeze and return -EBUSY (or
NVME_SC_INTERNAL on the nvmet path) instead of submitting the command.
Found by FuzzNvme(Syzkaller with FEMU fuzzing framework).
Acked-by: Sungwoo Kim <iam@xxxxxxxxxxxx>
Acked-by: Dave Tian <daveti@xxxxxxxxxx>
Acked-by: Weidong Zhu <weizhu@xxxxxxx>
Signed-off-by: Chao Shi <coshi036@xxxxxxxxx>
---
drivers/nvme/host/core.c | 26 ++++++++++++++++++++------
drivers/nvme/host/ioctl.c | 7 ++++++-
drivers/nvme/host/nvme.h | 3 ++-
drivers/nvme/target/passthru.c | 7 ++++++-
4 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7bf228df6001..575f98b9a6cc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1232,23 +1232,37 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
EXPORT_SYMBOL_NS_GPL(nvme_command_effects, "NVME_TARGET_PASSTHRU");
-u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+int nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode,
+ u32 *effects)
{
- u32 effects = nvme_command_effects(ctrl, ns, opcode);
+ *effects = nvme_command_effects(ctrl, ns, opcode);
/*
* For simplicity, IO to all namespaces is quiesced even if the command
- * effects say only one namespace is affected.
+ * effects say only one namespace is affected. Bound the drain wait so
+ * a stuck I/O cannot wedge the passthrough caller (and any task on the
+ * scan_lock or subsys lock) indefinitely; the other in-tree callers of
+ * the freeze drain (pci shutdown, tcp/rdma reset) already use this same
+ * NVME_IO_TIMEOUT bound.
*/
- if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
+ if (*effects & NVME_CMD_EFFECTS_CSE_MASK) {
mutex_lock(&ctrl->scan_lock);
mutex_lock(&ctrl->subsys->lock);
nvme_mpath_start_freeze(ctrl->subsys);
nvme_mpath_wait_freeze(ctrl->subsys);
nvme_start_freeze(ctrl);
- nvme_wait_freeze(ctrl);
+ if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+ dev_warn(ctrl->device,
+ "I/O did not drain in %u seconds; aborting passthrough\n",
+ nvme_io_timeout);
+ nvme_unfreeze(ctrl);
+ nvme_mpath_unfreeze(ctrl->subsys);
+ mutex_unlock(&ctrl->subsys->lock);
+ mutex_unlock(&ctrl->scan_lock);
+ return -EBUSY;
+ }
}
- return effects;
+ return 0;
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, "NVME_TARGET_PASSTHRU");
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index a9c097dacad6..762458a23b38 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -186,7 +186,12 @@ static int nvme_submit_user_cmd(struct request_queue *q,
bio = req->bio;
ctrl = nvme_req(req)->ctrl;
- effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
+ ret = nvme_passthru_start(ctrl, ns, cmd->common.opcode, &effects);
+ if (ret) {
+ if (bio)
+ blk_rq_unmap_user(bio);
+ goto out_free_req;
+ }
ret = nvme_execute_rq(req, false);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9a5f28c5103c..665d75de044e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -1211,7 +1211,8 @@ static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {};
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 opcode);
-u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
+int nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode,
+ u32 *effects);
int nvme_execute_rq(struct request *rq, bool at_head);
void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 67c423a8b052..7b97bfc1ace6 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -220,7 +220,12 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
u32 effects;
int status;
- effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode);
+ status = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode, &effects);
+ if (status) {
+ nvmet_req_complete(req, NVME_SC_INTERNAL);
+ blk_mq_free_request(rq);
+ return;
+ }
status = nvme_execute_rq(rq, false);
if (status == NVME_SC_SUCCESS &&
req->cmd->common.opcode == nvme_admin_identify) {
--
2.43.0