[PATCH v3 13/21] nvme-fc: Use CCR to recover controller that hits an error
From: Mohamed Khalfella
Date: Fri Feb 13 2026 - 23:29:33 EST
An alive nvme controller that hits an error now will move to FENCING
state instead of RESETTING state. ctrl->fencing_work attempts CCR to
terminate inflight IOs. Regardless of the success or failure of CCR
operation the controller is transitioned to RESETTING state to continue
error recovery process.
Signed-off-by: Mohamed Khalfella <mkhalfella@xxxxxxxxxxxxxxx>
---
drivers/nvme/host/fc.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e6ffaa19aba4..6ebabfb7e76d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -166,6 +166,7 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct blk_mq_tag_set tag_set;
+ struct work_struct fencing_work;
struct work_struct ioerr_work;
struct delayed_work connect_work;
@@ -1868,6 +1869,24 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
}
}
+static void nvme_fc_fencing_work(struct work_struct *work)
+{
+ struct nvme_fc_ctrl *fc_ctrl =
+ container_of(work, struct nvme_fc_ctrl, fencing_work);
+ struct nvme_ctrl *ctrl = &fc_ctrl->ctrl;
+ unsigned long rem;
+
+ rem = nvme_fence_ctrl(ctrl);
+ if (rem) {
+ dev_info(ctrl->device,
+ "CCR failed, skipping time-based recovery\n");
+ }
+
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_FENCED);
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ queue_work(nvme_reset_wq, &fc_ctrl->ioerr_work);
+}
+
static void
nvme_fc_ctrl_ioerr_work(struct work_struct *work)
{
@@ -1889,6 +1908,7 @@ nvme_fc_ctrl_ioerr_work(struct work_struct *work)
return;
}
+ flush_work(&ctrl->fencing_work);
nvme_fc_error_recovery(ctrl);
}
@@ -1915,6 +1935,14 @@ static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
{
enum nvme_ctrl_state state;
+ if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_FENCING)) {
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: starting controller fencing %s\n",
+ ctrl->cnum, errmsg);
+ queue_work(nvme_wq, &ctrl->fencing_work);
+ return;
+ }
+
if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) {
dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
ctrl->cnum, errmsg);
@@ -3322,6 +3350,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
+ flush_work(&ctrl->fencing_work);
nvme_stop_ctrl(&ctrl->ctrl);
/* will block will waiting for io to terminate */
@@ -3497,6 +3526,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+ INIT_WORK(&ctrl->fencing_work, nvme_fc_fencing_work);
INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
spin_lock_init(&ctrl->lock);
--
2.52.0