Re: [RFC PATCH 03/14] nvmet: Implement CCR nvme command
From: Sagi Grimberg
Date: Thu Dec 25 2025 - 08:14:45 EST
On 26/11/2025 4:11, Mohamed Khalfella wrote:
Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
Reset) command is an nvme command the is issued to source controller by
initiator to reset impacted controller. Implement CCR command for linux
nvme target.
Signed-off-by: Mohamed Khalfella <mkhalfella@xxxxxxxxxxxxxxx>
---
drivers/nvme/target/admin-cmd.c | 79 +++++++++++++++++++++++++++++++++
drivers/nvme/target/core.c | 69 ++++++++++++++++++++++++++++
drivers/nvme/target/nvmet.h | 13 ++++++
include/linux/nvme.h | 23 ++++++++++
4 files changed, 184 insertions(+)
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index aaceb697e4d2..a55ca010d34f 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
log->acs[nvme_admin_get_features] =
log->acs[nvme_admin_async_event] =
log->acs[nvme_admin_keep_alive] =
+ log->acs[nvme_admin_cross_ctrl_reset] =
cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+
}
static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
@@ -1615,6 +1617,80 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
+void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ictrl, *ctrl = req->sq->ctrl;
+ struct nvme_command *cmd = req->cmd;
+ struct nvmet_ccr *ccr, *new_ccr;
+ int ccr_active, ccr_total;
+ u16 cntlid, status = 0;
+
+ cntlid = le16_to_cpu(cmd->ccr.icid);
+ if (ctrl->cntlid == cntlid) {
+ req->error_loc =
+ offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ ictrl = nvmet_ctrl_find_get_ccr(ctrl->subsys, ctrl->hostnqn,
What does the 'i' stand for?
+ cmd->ccr.ciu, cntlid,
+ le64_to_cpu(cmd->ccr.cirn));
+ if (!ictrl) {
+ /* Immediate Reset Successful */
+ nvmet_set_result(req, 1);
+ status = NVME_SC_SUCCESS;
+ goto out;
+ }
+
+ new_ccr = kmalloc(sizeof(*ccr), GFP_KERNEL);
+ if (!new_ccr) {
+ status = NVME_SC_INTERNAL;
+ goto out_put_ctrl;
+ }
Allocating this later when you actually use it would probably simplify
error path.
+
+ ccr_total = ccr_active = 0;
+ mutex_lock(&ctrl->lock);
+ list_for_each_entry(ccr, &ctrl->ccrs, entry) {
+ if (ccr->ctrl == ictrl) {
+ status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
+ goto out_unlock;
+ }
+
+ ccr_total++;
+ if (ccr->ctrl)
+ ccr_active++;
+ }
+
+ if (ccr_active >= NVMF_CCR_LIMIT) {
+ status = NVME_SC_CCR_LIMIT_EXCEEDED;
+ goto out_unlock;
+ }
+ if (ccr_total >= NVMF_CCR_PER_PAGE) {
+ status = NVME_SC_CCR_LOGPAGE_FULL;
+ goto out_unlock;
+ }
+
+ new_ccr->ciu = cmd->ccr.ciu;
+ new_ccr->icid = cntlid;
+ new_ccr->ctrl = ictrl;
+ list_add_tail(&new_ccr->entry, &ctrl->ccrs);
+ mutex_unlock(&ctrl->lock);
+
+ nvmet_ctrl_fatal_error(ictrl);
Don't you need to wait for it to complete?
e.g. flush_work(&ictrl->fatal_err_work);
Or is that done async? will need to look downstream...