Re: [PATCH v3 09/21] nvme: Implement cross-controller reset completion

From: Mohamed Khalfella

Date: Tue Feb 17 2026 - 13:29:08 EST


On Mon 2026-02-16 13:43:51 +0100, Hannes Reinecke wrote:
> On 2/14/26 05:25, Mohamed Khalfella wrote:
> > An nvme source controller that issues CCR command expects to receive an
> > NVME_AER_NOTICE_CCR_COMPLETED when pending CCR succeeds or fails. Add
> > sctrl->ccr_work to read NVME_LOG_CCR logpage and wakeup any thread
> > waiting on CCR completion.
> >
> > Signed-off-by: Mohamed Khalfella <mkhalfella@xxxxxxxxxxxxxxx>
> > ---
> > drivers/nvme/host/core.c | 49 +++++++++++++++++++++++++++++++++++++++-
> > drivers/nvme/host/nvme.h | 1 +
> > 2 files changed, 49 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 765b1524b3ed..a9fcde1b411b 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -1916,7 +1916,8 @@ EXPORT_SYMBOL_GPL(nvme_set_queue_count);
> >
> > #define NVME_AEN_SUPPORTED \
> > (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \
> > - NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE)
> > + NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_CCR_COMPLETE | \
> > + NVME_AEN_CFG_DISC_CHANGE)
> >
> > static void nvme_enable_aen(struct nvme_ctrl *ctrl)
> > {
> > @@ -4880,6 +4881,47 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
> > kfree(log);
> > }
> >
> > +static void nvme_ccr_work(struct work_struct *work)
> > +{
> > + struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ccr_work);
> > + struct nvme_ccr_entry *ccr;
> > + struct nvme_ccr_log_entry *entry;
> > + struct nvme_ccr_log *log;
> > + unsigned long flags;
> > + int ret, i;
> > +
> > + log = kmalloc(sizeof(*log), GFP_KERNEL);
> > + if (!log)
> > + return;
> > +
> > + ret = nvme_get_log(ctrl, 0, NVME_LOG_CCR, 0x01,
> > + 0x00, log, sizeof(*log), 0);
> > + if (ret)
> > + goto out;
> > +
> > + spin_lock_irqsave(&ctrl->lock, flags);
> > + for (i = 0; i < le16_to_cpu(log->ne); i++) {
> > + entry = &log->entries[i];
> > + if (entry->ccrs == NVME_CCR_STATUS_IN_PROGRESS)
> > + continue;
> > +
> > + list_for_each_entry(ccr, &ctrl->ccr_list, list) {
> > + struct nvme_ctrl *ictrl = ccr->ictrl;
> > +
> > + if (ictrl->cntlid != le16_to_cpu(entry->icid) ||
> > + ictrl->ciu != entry->ciu)
> > + continue;
> > +
> > + /* Complete matching entry */
> > + ccr->ccrs = entry->ccrs;
> > + complete(&ccr->complete);
> > + }
> > + }
> > + spin_unlock_irqrestore(&ctrl->lock, flags);
> > +out:
> > + kfree(log);
> > +}
> > +
> > static void nvme_fw_act_work(struct work_struct *work)
> > {
> > struct nvme_ctrl *ctrl = container_of(work,
> > @@ -4956,6 +4998,9 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
> > case NVME_AER_NOTICE_DISC_CHANGED:
> > ctrl->aen_result = result;
> > break;
> > + case NVME_AER_NOTICE_CCR_COMPLETED:
> > + queue_work(nvme_wq, &ctrl->ccr_work);
> > + break;
> > default:
> > dev_warn(ctrl->device, "async event result %08x\n", result);
> > }
> > @@ -5145,6 +5190,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
> > nvme_stop_failfast_work(ctrl);
> > flush_work(&ctrl->async_event_work);
> > cancel_work_sync(&ctrl->fw_act_work);
> > + cancel_work_sync(&ctrl->ccr_work);
> > if (ctrl->ops->stop_ctrl)
> > ctrl->ops->stop_ctrl(ctrl);
> > }
> > @@ -5268,6 +5314,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> > ctrl->quirks = quirks;
> > ctrl->numa_node = NUMA_NO_NODE;
> > INIT_WORK(&ctrl->scan_work, nvme_scan_work);
> > + INIT_WORK(&ctrl->ccr_work, nvme_ccr_work);
> > INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
> > INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
> > INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index f3ab9411cac5..af6a4e83053e 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -365,6 +365,7 @@ struct nvme_ctrl {
> > struct nvme_effects_log *effects;
> > struct xarray cels;
> > struct work_struct scan_work;
> > + struct work_struct ccr_work;
> > struct work_struct async_event_work;
> > struct delayed_work ka_work;
> > struct delayed_work failfast_work;
>
> We really would need some indicator whether 'ccr' is supported at all.

Why do we need this indicator, other than exporting it via sysfs?

> Using the number of available CCR commands would be an option, if though
> that would require us to keep two counters (one for the number of
> possible outstanding CCRs, and one for the number of actual outstanding
> CCRs.).

Like mentioned above ctrl->ccr_limit gives us the number of ccrs
available now. It is not 100% indicator if CCR is supported or not, but
it is enough to implement CCR. A second counter can help us skip trying
CCR if we know impacted controller does not support it.

Do you think it is worth it?

Iterating over controllers in the subsystem is not that bad IMO. This is
similar to the point raised by James Smart [1].

1- https://lore.kernel.org/all/05875e07-b908-425a-ba6f-5e060e03241e@xxxxxxxxx/