Re: [PATCH v7 05/11] cxl: Add CXL Device Reset helper
From: Richard Cheng
Date: Wed Jun 24 2026 - 10:34:35 EST
On Tue, Jun 23, 2026 at 03:24:47AM +0800, Srirangan Madhavan wrote:
> Add an internal CXL Device Reset helper for Type 2 functions that advertise
> CXL Reset in the CXL Device DVSEC. The helper disables CXL.cache, performs
> cache writeback when supported, initiates reset with Memory Clear disabled,
> waits for completion, and re-enables CXL.cache on exit.
>
> Leave the helper unregistered until range validation and reset-scope
> coordination are in place.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
> ---
> drivers/cxl/core/reset.c | 221 ++++++++++++++++++++++++++++++++++
> include/cxl/cxl.h | 7 ++
> include/uapi/linux/pci_regs.h | 14 +++
> 3 files changed, 242 insertions(+)
>
> diff --git a/drivers/cxl/core/reset.c b/drivers/cxl/core/reset.c
> index fc52d3abdb5b..fdfcc9e825e0 100644
> --- a/drivers/cxl/core/reset.c
> +++ b/drivers/cxl/core/reset.c
> @@ -7,6 +7,8 @@
> #include <linux/export.h>
> #include <linux/io.h>
> #include <linux/ioport.h>
> +#include <linux/iommu.h>
> +#include <linux/jiffies.h>
> #include <linux/kernel.h>
> #include <linux/pci.h>
> #include <linux/slab.h>
> @@ -318,3 +320,222 @@ int pci_cxl_hdm_init(struct pci_dev *pdev)
> cxl_pci_hdm_unmap(pdev, ®s, &map);
> return rc;
> }
> +
> +/*
> + * CXL r4.0 sec 9.7.2 defines the reset completion timeout encodings.
> + * Sec 9.7.3 leaves config-space access behavior undefined for 100 ms after
> + * initiating CXL Reset, then limits software to CXL Status2 access until
> + * reset completion, timeout, or error.
> + */
> +#define CXL_RESET_RRS_WAIT_MS 100
> +#define CXL_RESET_STATUS_POLL_MS 20
> +static const u32 cxl_reset_timeout_ms[] = {
> + 10, 100, 1000, 10000, 100000,
> +};
> +
> +#define CXL_CACHE_WBI_TIMEOUT_US 100000
> +#define CXL_CACHE_WBI_POLL_US 100
> +
> +static int cxl_reset_dvsec(struct pci_dev *pdev)
> +{
> + int dvsec, rc;
> + u16 cap;
> +
> + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> + PCI_DVSEC_CXL_DEVICE);
> + if (!dvsec)
> + return -ENOTTY;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + if ((cap & (PCI_DVSEC_CXL_CACHE_CAPABLE |
> + PCI_DVSEC_CXL_MEM_CAPABLE)) !=
> + (PCI_DVSEC_CXL_CACHE_CAPABLE | PCI_DVSEC_CXL_MEM_CAPABLE))
> + return -ENOTTY;
> +
> + if (!(cap & PCI_DVSEC_CXL_RST_CAPABLE))
> + return -ENOTTY;
> +
> + return dvsec;
> +}
> +
> +static int cxl_reset_update_ctrl2(struct pci_dev *pdev, int dvsec, u16 set,
> + u16 clear)
> +{
> + u16 ctrl2;
> + int rc;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + ctrl2 |= set;
> + ctrl2 &= ~clear;
> +
> + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + return 0;
> +}
> +
> +static int cxl_reset_enable_cache(struct pci_dev *pdev, int dvsec)
> +{
> + return cxl_reset_update_ctrl2(pdev, dvsec, 0,
> + PCI_DVSEC_CXL_DISABLE_CACHING);
> +}
> +
> +static int cxl_reset_disable_cache(struct pci_dev *pdev, int dvsec, u16 cap)
> +{
> + int remaining_us = CXL_CACHE_WBI_TIMEOUT_US;
> + u16 status2;
> + int rc, rc2;
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec,
> + PCI_DVSEC_CXL_DISABLE_CACHING, 0);
> + if (rc)
> + return rc;
> +
> + if (!(cap & PCI_DVSEC_CXL_CACHE_WBI_CAPABLE))
> + return 0;
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec,
> + PCI_DVSEC_CXL_INIT_CACHE_WBI, 0);
> + if (rc)
> + goto err_enable_cache;
> +
> + do {
> + usleep_range(CXL_CACHE_WBI_POLL_US, CXL_CACHE_WBI_POLL_US + 1);
> + remaining_us -= CXL_CACHE_WBI_POLL_US;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2,
> + &status2);
> + if (rc) {
> + rc = pcibios_err_to_errno(rc);
> + goto err_enable_cache;
> + }
> + } while (!(status2 & PCI_DVSEC_CXL_CACHE_INV) && remaining_us > 0);
> +
> + if (!(status2 & PCI_DVSEC_CXL_CACHE_INV)) {
> + rc = -ETIMEDOUT;
> + goto err_enable_cache;
> + }
> +
> + return 0;
> +
> +err_enable_cache:
> + /*
> + * DISABLE_CACHING can be rolled back here. INIT_CACHE_WBI is
> + * self-clearing on completion, so leave any in-flight writeback alone.
> + */
> + rc2 = cxl_reset_enable_cache(pdev, dvsec);
> + if (rc2)
> + pci_warn(pdev, "failed to re-enable CXL caching: %d\n", rc2);
> + return rc;
> +}
> +
> +static int cxl_reset_wait_done(struct pci_dev *pdev, int dvsec, u16 cap)
> +{
> + unsigned long deadline;
> + u32 timeout_ms;
> + u16 status2;
> + int idx, rc;
> +
> + idx = FIELD_GET(PCI_DVSEC_CXL_RST_TIMEOUT, cap);
> + if (idx >= ARRAY_SIZE(cxl_reset_timeout_ms)) {
> + int last = ARRAY_SIZE(cxl_reset_timeout_ms) - 1;
> +
> + pci_warn(pdev,
> + "unknown CXL reset timeout encoding %d; using %u ms\n",
> + idx, cxl_reset_timeout_ms[last]);
> + idx = last;
> + }
> +
> + timeout_ms = max_t(u32, cxl_reset_timeout_ms[idx],
> + CXL_RESET_RRS_WAIT_MS);
> + deadline = jiffies + msecs_to_jiffies(timeout_ms);
> + msleep(CXL_RESET_RRS_WAIT_MS);
> +
> + do {
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2,
> + &status2);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + if (status2 & PCI_DVSEC_CXL_RST_ERR)
> + return -EIO;
> +
I think this returns -EIO for a reset that actually succeeds.
During the post-reset RRS window the Status2 read comes back 0xffff.
0xffff has bit 2 set, so it's taken as RST_ERR and we bail on the very
first poll. A transient config-read error is likewise treated as fatal instead
of being retried.
I would suggest to skip all-ones reads as "not ready" and keep polling until
the deadline, only act on RST_ERR and RST_DONE from a valid read.
Best regards,
Richard Cheng
> + if (status2 & PCI_DVSEC_CXL_RST_DONE)
> + return 0;
> +
> + if (time_after_eq(jiffies, deadline))
> + return -ETIMEDOUT;
> +
> + msleep(CXL_RESET_STATUS_POLL_MS);
> + } while (true);
> +}
> +
> +static int cxl_reset_execute(struct pci_dev *pdev, int dvsec)
> +{
> + bool cache_disabled = false;
> + u16 cap;
> + int rc;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + if (!pci_wait_for_pending_transaction(pdev))
> + pci_err(pdev, "timed out waiting for pending transactions\n");
> +
> + rc = pci_dev_reset_iommu_prepare(pdev);
> + if (rc) {
> + pci_err(pdev, "failed to stop IOMMU for CXL reset: %d\n", rc);
> + return rc;
> + }
> +
> + rc = cxl_reset_disable_cache(pdev, dvsec, cap);
> + if (rc)
> + goto out;
> + cache_disabled = true;
> +
> + rc = cxl_reset_update_ctrl2(pdev, dvsec, PCI_DVSEC_CXL_INIT_CXL_RST,
> + PCI_DVSEC_CXL_RST_MEM_CLR_EN);
> + if (rc)
> + goto out;
> +
> + rc = cxl_reset_wait_done(pdev, dvsec, cap);
> + if (rc)
> + goto out;
> +
> +out:
> + if (cache_disabled) {
> + int rc2;
> +
> + rc2 = cxl_reset_enable_cache(pdev, dvsec);
> + if (rc2 && rc)
> + pci_warn(pdev, "failed to re-enable CXL caching: %d\n",
> + rc2);
> + else if (rc2)
> + rc = rc2;
> + }
> +
> + pci_dev_reset_iommu_done(pdev);
> + return rc;
> +}
> +
> +int cxl_reset_function(struct pci_dev *pdev, bool probe)
> +{
> + int dvsec;
> +
> + dvsec = cxl_reset_dvsec(pdev);
> + if (dvsec < 0)
> + return dvsec;
> +
> + if (probe)
> + return 0;
> +
> + return cxl_reset_execute(pdev, dvsec);
> +}
> diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
> index e3087b7517e8..1fe606f15733 100644
> --- a/include/cxl/cxl.h
> +++ b/include/cxl/cxl.h
> @@ -9,6 +9,7 @@
> #include <linux/node.h>
> #include <linux/ioport.h>
> #include <linux/range.h>
> +#include <linux/errno.h>
> #include <cxl/mailbox.h>
>
> /**
> @@ -137,11 +138,17 @@ struct cxl_hdm_info {
> int cxl_commit(struct cxl_decoder_settings *settings, void __iomem *hdm);
> #ifdef CONFIG_CXL_HDM
> int pci_cxl_hdm_init(struct pci_dev *pdev);
> +int cxl_reset_function(struct pci_dev *pdev, bool probe);
> #else
> static inline int pci_cxl_hdm_init(struct pci_dev *pdev)
> {
> return -ENOTTY;
> }
> +
> +static inline int cxl_reset_function(struct pci_dev *pdev, bool probe)
> +{
> + return -ENOTTY;
> +}
> #endif
>
> struct cxl_reg_map {
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index 14f634ab9350..194ae56b4404 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -1349,10 +1349,24 @@
> /* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
> #define PCI_DVSEC_CXL_DEVICE 0
> #define PCI_DVSEC_CXL_CAP 0xA
> +#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
> #define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
> #define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
> +#define PCI_DVSEC_CXL_CACHE_WBI_CAPABLE _BITUL(6)
> +#define PCI_DVSEC_CXL_RST_CAPABLE _BITUL(7)
> +#define PCI_DVSEC_CXL_RST_TIMEOUT __GENMASK(10, 8)
> +#define PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE _BITUL(11)
> #define PCI_DVSEC_CXL_CTRL 0xC
> #define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2)
> +#define PCI_DVSEC_CXL_CTRL2 0x10
> +#define PCI_DVSEC_CXL_DISABLE_CACHING _BITUL(0)
> +#define PCI_DVSEC_CXL_INIT_CACHE_WBI _BITUL(1)
> +#define PCI_DVSEC_CXL_INIT_CXL_RST _BITUL(2)
> +#define PCI_DVSEC_CXL_RST_MEM_CLR_EN _BITUL(3)
> +#define PCI_DVSEC_CXL_STATUS2 0x12
> +#define PCI_DVSEC_CXL_CACHE_INV _BITUL(0)
> +#define PCI_DVSEC_CXL_RST_DONE _BITUL(1)
> +#define PCI_DVSEC_CXL_RST_ERR _BITUL(2)
> #define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10))
> #define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10))
> #define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0)
> --
> 2.43.0
>