Re: [PATCH v6 5/9] cxl/pci: Add CXL DVSEC reset helper

From: Cheatham, Benjamin

Date: Tue Jun 02 2026 - 16:37:17 EST


On 5/28/2026 3:31 AM, Srirangan Madhavan wrote:
> Add a helper to execute CXL Reset through the CXL Device DVSEC. The
> helper verifies reset capability, waits for pending PCIe transactions,
> disables CXL.cache, optionally initiates cache writeback and invalidation,
> and then starts CXL Reset through the DVSEC Control2 register.
>
> Block IOMMU traffic while reset is active, then restore IOMMU
> translations after reset completes.
>
> Wait for the DVSEC reset timeout before checking reset completion, and
> report reset error or timeout status from the DVSEC Status2 register. Add
> the CXL Device DVSEC reset and cache control definitions needed by the
> helper.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
> ---
> drivers/cxl/core/pci.c | 185 ++++++++++++++++++++++++++++++++++
> include/uapi/linux/pci_regs.h | 13 +++
> 2 files changed, 198 insertions(+)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 01effbb4e7cd..1dd880f5a333 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -20,6 +20,9 @@
> #define CXL_RESET_MAX_FUNCTIONS 256
> #define CXL_RESET_FUNCTION_MAP_REGS (CXL_RESET_MAX_FUNCTIONS / 32)
> #define CXL_RESET_SIBLINGS_INIT 8
> +#define CXL_RESET_CACHE_WBI_POLL_US 100
> +#define CXL_RESET_CACHE_WBI_TIMEOUT_US (100 * USEC_PER_MSEC)
> +#define CXL_RESET_MIN_QUIET_MS 100
>
> /**
> * DOC: cxl core pci
> @@ -1303,3 +1306,185 @@ cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
> cxl_pci_functions_reset_done(ctx);
> return rc;
> }
> +
> +static int cxl_reset_update_ctrl2(struct pci_dev *pdev, int dvsec, u16 set,
> + u16 clear)

I'd change to cxl_dvsec_update_ctrl2(), see next comment.

> +{
> + u16 ctrl2;
> + int rc;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
> + if (rc)
> + return rc;
> +
> + ctrl2 &= ~clear;
> + ctrl2 |= set;
> +
> + return pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2);
> +}
> +
> +static int cxl_reset_wait_cache_inv(struct pci_dev *pdev, int dvsec)
> +{
> + int remaining_us = CXL_RESET_CACHE_WBI_TIMEOUT_US;
> + u16 status2;
> + int rc;
> +
> + do {
> + usleep_range(CXL_RESET_CACHE_WBI_POLL_US,
> + CXL_RESET_CACHE_WBI_POLL_US + 1);
> + remaining_us -= CXL_RESET_CACHE_WBI_POLL_US;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2,
> + &status2);
> + if (rc)
> + return rc;
> +
> + if (status2 & PCI_DVSEC_CXL_CACHE_INV)
> + return 0;
> + } while (remaining_us > 0);
> +
> + pci_err(pdev, "CXL cache WB+I timed out\n");
> + return -ETIMEDOUT;
> +}
> +
> +static int cxl_reset_enable_cache(struct pci_dev *pdev, int dvsec, u16 cap)
> +{
> + if (!(cap & PCI_DVSEC_CXL_CACHE_CAPABLE))
> + return 0;
> +
> + return cxl_reset_update_ctrl2(pdev, dvsec, 0,
> + PCI_DVSEC_CXL_DISABLE_CACHING);
> +}
> +
> +static int cxl_reset_disable_cache(struct pci_dev *pdev, int dvsec, u16 cap)

I would remove the reset portion of the names of these three functions. There's nothing
reset specific about them and it'd be nice to have general functions for when/if CXL.cache
device support comes to the core.

Also would like to see the RESET part of the WBI #defines dropped for the same reasoning.

> +{
> + int rc;
> +
> + if (!(cap & PCI_DVSEC_CXL_CACHE_CAPABLE))
> + return 0;
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec,
> + PCI_DVSEC_CXL_DISABLE_CACHING, 0);
> + if (rc)
> + return rc;
> +
> + if (!(cap & PCI_DVSEC_CXL_CACHE_WBI_CAPABLE))
> + return 0;
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec,
> + PCI_DVSEC_CXL_INIT_CACHE_WBI, 0);
> + if (rc)
> + goto err_enable_cache;

I think you can probably return here. I would be surprised if the pci_write/read_config()
updated the register with the new value while also returning an error.

> +
> + rc = cxl_reset_wait_cache_inv(pdev, dvsec);
> + if (rc)
> + goto err_enable_cache;
> +
> + return 0;

With above change you can drop the goto and do:
rc = cxl_reset_wait_cache_inv(pdev, dvsec);
if (rc)
dev_warn(<descriptive error message>);

return rc;

> +
> +err_enable_cache:
> + /*
> + * Best effort rollback: preserve the original WB+I failure even if
> + * re-enabling CXL.cache also fails.
> + */
> + cxl_reset_enable_cache(pdev, dvsec, cap);
> + return rc;
> +}
> +
> +static int cxl_reset_wait_done(struct pci_dev *pdev, int dvsec, u16 cap)
> +{
> + static const u32 reset_timeout_ms[] = { 10, 100, 1000, 10000, 100000 };
> + u32 timeout_ms;
> + u16 status2;
> + int rc, idx;
> +
> + idx = FIELD_GET(PCI_DVSEC_CXL_RST_TIMEOUT, cap);
> + if (idx >= ARRAY_SIZE(reset_timeout_ms))
> + idx = ARRAY_SIZE(reset_timeout_ms) - 1;

I'd put a dev_dbg() here just in case someone is wondering why their timeout value
is being shortened. Would also be nice for vendors/users to see that their hardware is
being programmed with a value that's (probably) too long.

> + timeout_ms = reset_timeout_ms[idx];
> +
> + msleep(max_t(u32, timeout_ms, CXL_RESET_MIN_QUIET_MS));
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2,
> + &status2);
> + if (rc)
> + return rc;
> +
> + if (status2 & PCI_DVSEC_CXL_RST_ERR) {
> + pci_err(pdev, "CXL reset error\n");
> + return -EIO;
> + }
> +
> + if (!(status2 & PCI_DVSEC_CXL_RST_DONE)) {
> + pci_err(pdev, "CXL reset timed out\n");
> + return -ETIMEDOUT;
> + }
> +
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_dev_reset(struct pci_dev *pdev, bool mem_clear)
> +{
> + int dvsec, rc;
> + u16 ctrl2_clear = 0;
> + u16 cap;
> +
> + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> + PCI_DVSEC_CXL_DEVICE);
> + if (!dvsec)
> + return -ENODEV;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
> + if (rc)
> + return rc;
> +
> + if (!(cap & PCI_DVSEC_CXL_RST_CAPABLE))
> + return -EOPNOTSUPP;
> +
> + if (mem_clear && !(cap & PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE))
> + return -EOPNOTSUPP;
> +
> + if (!pci_wait_for_pending_transaction(pdev))
> + pci_err(pdev, "timed out waiting for pending transactions\n");
> +
> + rc = pci_dev_reset_iommu_prepare(pdev);
> + if (rc) {
> + pci_err(pdev, "failed to block IOMMU for CXL reset: %d\n",
> + rc);
> + return rc;
> + }
> +
> + rc = cxl_reset_disable_cache(pdev, dvsec, cap);
> + if (rc)
> + goto out_iommu;
> + if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE)
> + ctrl2_clear |= PCI_DVSEC_CXL_DISABLE_CACHING;
> +
> + if (mem_clear) {
> + rc = cxl_reset_update_ctrl2(pdev, dvsec,
> + PCI_DVSEC_CXL_RST_MEM_CLR_EN, 0);
> + if (rc)
> + goto out_ctrl2;
> + ctrl2_clear |= PCI_DVSEC_CXL_RST_MEM_CLR_EN;
> + }
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec,
> + PCI_DVSEC_CXL_INIT_CXL_RST, 0);
> + if (rc)
> + goto out_ctrl2;
> +
> + rc = cxl_reset_wait_done(pdev, dvsec, cap);
> + if (rc)
> + goto out_iommu;
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec, 0,
> + PCI_DVSEC_CXL_DISABLE_CACHING);
> +
> +out_ctrl2:
> + if (rc && ctrl2_clear)
> + cxl_reset_update_ctrl2(pdev, dvsec, 0, ctrl2_clear);
> +
> +out_iommu:
> + pci_dev_reset_iommu_done(pdev);
> + return rc;
> +}
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index fa1fcd26af01..7fc1d34fcce7 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -1352,8 +1352,21 @@
> #define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
> #define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
> #define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
> +#define PCI_DVSEC_CXL_CACHE_WBI_CAPABLE _BITUL(6)
> +#define PCI_DVSEC_CXL_RST_CAPABLE _BITUL(7)
> +#define PCI_DVSEC_CXL_RST_TIMEOUT __GENMASK(10, 8)
> +#define PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE _BITUL(11)
> #define PCI_DVSEC_CXL_CTRL 0xC
> #define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2)
> +#define PCI_DVSEC_CXL_CTRL2 0x10
> +#define PCI_DVSEC_CXL_DISABLE_CACHING _BITUL(0)
> +#define PCI_DVSEC_CXL_INIT_CACHE_WBI _BITUL(1)
> +#define PCI_DVSEC_CXL_INIT_CXL_RST _BITUL(2)
> +#define PCI_DVSEC_CXL_RST_MEM_CLR_EN _BITUL(3)
> +#define PCI_DVSEC_CXL_STATUS2 0x12
> +#define PCI_DVSEC_CXL_CACHE_INV _BITUL(0)
> +#define PCI_DVSEC_CXL_RST_DONE _BITUL(1)
> +#define PCI_DVSEC_CXL_RST_ERR _BITUL(2)
> #define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10))
> #define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10))
> #define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0)