Re: [PATCH v5 5/7] cxl: Add CXL DVSEC reset sequence and flow orchestration
From: Dave Jiang
Date: Mon Mar 09 2026 - 20:26:29 EST
On 3/6/26 2:23 AM, smadhavan@xxxxxxxxxx wrote:
> From: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
>
> cxl_dev_reset() implements the hardware reset sequence:
> optionally enable memory clear, initiate reset via
> CTRL2, wait for completion, and re-enable caching.
>
> cxl_do_reset() orchestrates the full reset flow:
> 1. CXL pre-reset: mem offlining and cache flush (when memdev present)
> 2. PCI save/disable: pci_dev_save_and_disable() automatically saves
> CXL DVSEC and HDM decoder state via PCI core hooks
> 3. Sibling coordination: save/disable CXL.cachemem sibling functions
> 4. Execute CXL DVSEC reset
> 5. Sibling restore: always runs to re-enable sibling functions
> 6. PCI restore: pci_dev_restore() automatically restores CXL state
>
> The CXL-specific DVSEC and HDM save/restore is handled
> by the PCI core's CXL save/restore infrastructure (drivers/pci/cxl.c).
>
> Signed-off-by: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
> ---
> drivers/cxl/core/pci.c | 181 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 179 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index b6f10a2cb404..c758b3f1b3f9 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -1078,7 +1078,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *func, void *data)
> return 0;
> }
>
> -static void __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
> +static void cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
> {
> struct pci_dev *pdev = ctx->target;
> struct cxl_reset_walk_ctx wctx;
> @@ -1103,7 +1103,7 @@ static void __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_cont
> }
> }
>
> -static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
> +static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
> {
> int i;
>
> @@ -1116,3 +1116,180 @@ static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context
> ctx->pci_functions = NULL;
> ctx->pci_func_count = 0;
> }
> +
> +/*
> + * CXL device reset execution
> + */
> +static int cxl_dev_reset(struct pci_dev *pdev, int dvsec)
> +{
> + static const u32 reset_timeout_ms[] = { 10, 100, 1000, 10000, 100000 };
> + u16 cap, ctrl2, status2;
> + u32 timeout_ms;
> + int rc, idx;
> +
> + if (!pci_wait_for_pending_transaction(pdev))
> + pci_err(pdev, "timed out waiting for pending transactions\n");
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
> + if (rc)
> + return rc;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
> + if (rc)
> + return rc;
> +
> + /*
> + * Disable caching and initiate cache writeback+invalidation if the
> + * device supports it. Poll for completion.
> + * Per CXL r3.2 section 9.6, software may use the cache size from
> + * DVSEC CXL Capability2 to compute a suitable timeout; we use a
> + * default of 10ms.
> + */
> + if (cap & PCI_DVSEC_CXL_CACHE_WBI_CAPABLE) {
> + u32 wbi_poll_us = 100;
> + s32 wbi_remaining_us = 10000;
> +
> + ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING;
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
> + ctrl2);
> + if (rc)
> + return rc;
> +
> + ctrl2 |= PCI_DVSEC_CXL_INIT_CACHE_WBI;
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
> + ctrl2);
> + if (rc)
> + return rc;
> +
> + do {
> + usleep_range(wbi_poll_us, wbi_poll_us + 1);
> + wbi_remaining_us -= wbi_poll_us;
> + rc = pci_read_config_word(pdev,
> + dvsec + PCI_DVSEC_CXL_STATUS2,
> + &status2);
> + if (rc)
> + return rc;
> + } while (!(status2 & PCI_DVSEC_CXL_CACHE_INV) &&
> + wbi_remaining_us > 0);
> +
> + if (!(status2 & PCI_DVSEC_CXL_CACHE_INV)) {
> + pci_err(pdev, "CXL cache WB+I timed out\n");
> + return -ETIMEDOUT;
> + }
> + } else if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE) {
> + ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING;
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
> + ctrl2);
> + if (rc)
> + return rc;
> + }
> +
> + if (cap & PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE) {
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
> + &ctrl2);
> + if (rc)
> + return rc;
> +
> + ctrl2 |= PCI_DVSEC_CXL_RST_MEM_CLR_EN;
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
> + ctrl2);
> + if (rc)
> + return rc;
> + }
> +
> + idx = FIELD_GET(PCI_DVSEC_CXL_RST_TIMEOUT, cap);
> + if (idx >= ARRAY_SIZE(reset_timeout_ms))
> + idx = ARRAY_SIZE(reset_timeout_ms) - 1;
> + timeout_ms = reset_timeout_ms[idx];
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
> + if (rc)
> + return rc;
> +
> + ctrl2 |= PCI_DVSEC_CXL_INIT_CXL_RST;
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2);
> + if (rc)
> + return rc;
> +
> + msleep(timeout_ms);
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2,
> + &status2);
> + if (rc)
> + return rc;
> +
> + if (status2 & PCI_DVSEC_CXL_RST_ERR) {
> + pci_err(pdev, "CXL reset error\n");
> + return -EIO;
> + }
> +
> + if (!(status2 & PCI_DVSEC_CXL_RST_DONE)) {
> + pci_err(pdev, "CXL reset timeout\n");
> + return -ETIMEDOUT;
> + }
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
> + if (rc)
> + return rc;
> +
> + ctrl2 &= ~PCI_DVSEC_CXL_DISABLE_CACHING;
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2);
> + if (rc)
> + return rc;
> +
> + return 0;
> +}
> +
> +static int match_memdev_by_parent(struct device *dev, const void *parent)
> +{
> + return is_cxl_memdev(dev) && dev->parent == parent;
> +}
> +
> +static int cxl_do_reset(struct pci_dev *pdev)
> +{
> + struct cxl_reset_context ctx = { .target = pdev };
> + struct cxl_memdev *cxlmd = NULL;
> + struct device *memdev = NULL;
> + int dvsec, rc;
> +
> + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> + PCI_DVSEC_CXL_DEVICE);
> + if (!dvsec)
> + return -ENODEV;
> +
> + memdev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev,
> + match_memdev_by_parent);
You can create a custom __free() function here with memdev.
> + if (memdev) {
> + cxlmd = to_cxl_memdev(memdev);
> + guard(device)(&cxlmd->dev);
> + }
> +
> + mutex_lock(&cxl_reset_mutex);
guard(mutex)(&cxl_reset_mutex)?
> + pci_dev_lock(pdev);
> +
> + if (cxlmd) {
> + rc = cxl_reset_prepare_memdev(cxlmd);
> + if (rc)
> + goto out_unlock;
> +
> + cxl_reset_flush_cpu_caches(cxlmd);
> + }
Can you move the discovery and touching of cxlmd to a helper function? Would that clean things up a bit here?
DJ
> +
> + pci_dev_save_and_disable(pdev);
> + cxl_pci_functions_reset_prepare(&ctx);
> +
> + rc = cxl_dev_reset(pdev, dvsec);
> +
> + cxl_pci_functions_reset_done(&ctx);
> +
> + pci_dev_restore(pdev);
> +
> +out_unlock:
> + pci_dev_unlock(pdev);
> + mutex_unlock(&cxl_reset_mutex);
> +
> + if (memdev)
> + put_device(memdev);
> +
> + return rc;
> +}
> --
> 2.43.0
>