RE: [PATCH 2/2] nvme-pci: handle persistent internal error AER from NVMe controller

From: Michael Kelley (LINUX)
Date: Wed Jun 01 2022 - 11:57:12 EST


From: Christoph Hellwig <hch@xxxxxx>
>
> This really belongs into common code. See the untested patch below
> of how I'd do it. The nvme_should_reset would be a separate prep
> patch again.

Indeed, that makes sense. I had missed that execution gets from
the common code back to the PCI-specific code via the reset_work
function, so unnecessarily did everything in the pci.c.

If there is a persistent error that does a controller reset, it looks
like we should *not* queue async_event_work at the end of
nvme_complete_async_event(). The controller reset will
submit an AER on the admin queue, and so presumably
we don't want nvme_async_event_work() to also try to submit
another AER, which may or may not succeed depending on the
timing of when the controller state shows LIVE again.
Agreed?

Michael

>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 72f7c955c7078..b8b8e9ee04120 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -171,6 +171,24 @@ static inline void nvme_stop_failfast_work(struct nvme_ctrl
> *ctrl)
> clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
> }
>
> +bool nvme_should_reset(struct nvme_ctrl *ctrl, u32 csts)
> +{
> + /* If there is a reset/reinit ongoing, we shouldn't reset again. */
> + switch (ctrl->state) {
> + case NVME_CTRL_RESETTING:
> + case NVME_CTRL_CONNECTING:
> + return false;
> + default:
> + break;
> + }
> +
> + /*
> + * We shouldn't reset unless the controller is on fatal error state or
> + * if we lost the communication with it.
> + */
> + return (csts & NVME_CSTS_CFS) ||
> + (ctrl->subsystem && (csts & NVME_CSTS_NSSRO));
> +}
>
> int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
> {
> @@ -4537,24 +4555,41 @@ static void nvme_handle_aen_notice(struct nvme_ctrl
> *ctrl, u32 result)
> }
> }
>
> +static void nvme_handle_aen_persistent_error(struct nvme_ctrl *ctrl)
> +{
> + u32 csts;
> +
> + if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts) < 0 ||
> + nvme_should_reset(ctrl, csts)) {
> + dev_warn(ctrl->device, "resetting due to AEN\n");
> + nvme_reset_ctrl(ctrl);
> + }
> +}
> +
> void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
> volatile union nvme_result *res)
> {
> u32 result = le32_to_cpu(res->u32);
> - u32 aer_type = result & 0x07;
> + u32 aen_type = result & 0x07;
> + u32 aen_subtype = (result & 0xff00) >> 8;
>
> if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
> return;
>
> - switch (aer_type) {
> + switch (aen_type) {
> case NVME_AER_NOTICE:
> nvme_handle_aen_notice(ctrl, result);
> break;
> case NVME_AER_ERROR:
> + if (aen_subtype == NVME_AER_ERROR_PERSIST_INT_ERR) {
> + nvme_handle_aen_persistent_error(ctrl);
> + break;
> + }
> + fallthrough;
> case NVME_AER_SMART:
> case NVME_AER_CSS:
> case NVME_AER_VS:
> - trace_nvme_async_event(ctrl, aer_type);
> + trace_nvme_async_event(ctrl, aen_type);
> ctrl->aen_result = result;
> break;
> default:
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9b72b6ecf33c9..0d7e9ac52d25a 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -762,6 +762,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
> u32 *result);
> int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
> void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
> +bool nvme_should_reset(struct nvme_ctrl *ctrl, u32 csts);
> int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
> int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
> int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 5a98a7de09642..c57023d98f8f3 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -1293,31 +1293,6 @@ static void abort_endio(struct request *req, blk_status_t
> error)
> blk_mq_free_request(req);
> }
>
> -static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
> -{
> - /* If true, indicates loss of adapter communication, possibly by a
> - * NVMe Subsystem reset.
> - */
> - bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
> -
> - /* If there is a reset/reinit ongoing, we shouldn't reset again. */
> - switch (dev->ctrl.state) {
> - case NVME_CTRL_RESETTING:
> - case NVME_CTRL_CONNECTING:
> - return false;
> - default:
> - break;
> - }
> -
> - /* We shouldn't reset unless the controller is on fatal error state
> - * _or_ if we lost the communication with it.
> - */
> - if (!(csts & NVME_CSTS_CFS) && !nssro)
> - return false;
> -
> - return true;
> -}
> -
> static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
> {
> /* Read a config register to help see what died. */
> @@ -1355,7 +1330,7 @@ static enum blk_eh_timer_return nvme_timeout(struct
> request *req, bool reserved)
> /*
> * Reset immediately if the controller is failed
> */
> - if (nvme_should_reset(dev, csts)) {
> + if (nvme_should_reset(&dev->ctrl, csts)) {
> nvme_warn_reset(dev, csts);
> nvme_dev_disable(dev, false);
> nvme_reset_ctrl(&dev->ctrl);
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 29ec3e3481ff6..8ced2439f1f34 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -711,6 +711,10 @@ enum {
> NVME_AER_VS = 7,
> };
>
> +enum {
> + NVME_AER_ERROR_PERSIST_INT_ERR = 0x03,
> +};
> +
> enum {
> NVME_AER_NOTICE_NS_CHANGED = 0x00,
> NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,