Re: [PATCH v2] cxl/port: Fix missing port lock in cxl_dport_remove()
From: Dave Jiang
Date: Mon Jun 08 2026 - 20:38:53 EST
On 6/8/26 3:35 PM, Terry Bowman wrote:
> xa_erase() in cxl_dport_remove() runs without the port device lock,
> creating a race with any caller that does xa_load() on port->dports
> and then dereferences the returned dport pointer. A concurrent
> cxl_dport_remove() can erase and free the dport between the xa_load()
> and the caller acquiring the port lock, causing a use-after-free.
>
> For non-root ports the port lock is already held by the caller on two
> paths:
>
> 1. Driver unbind: devres_release_all() is called from
> __device_release_driver() which holds port->dev.mutex.
>
> 2. Dynamic endpoint removal: cxl_detach_ep() takes the port lock
> before calling del_dports() -> del_dport() -> devres_release_group(),
> which synchronously runs cxl_dport_remove().
>
> Use cond_cxl_root_lock/unlock(), which only acquires the port lock when
> the port is a root port and the lock is therefore not already held.
> This matches the pattern used in __devm_cxl_add_dport() for the same
> reason.
>
> The write-side fix to cxl_dport_remove() is necessary but not
> sufficient. Callers that obtain a dport pointer via cxl_mem_find_port()
> use a lockless xa_load() and must not dereference that pointer until a
> lock that excludes free_dport()/kfree() is held.
>
> For root ports, dport_to_host() returns uport_dev, so all three devres
> actions (free_dport, cxl_dport_remove, cxl_dport_unlink) are registered
> on uport_dev. __device_release_driver() holds uport_dev->mutex for the
> full teardown sequence including kfree(dport). Holding uport_dev->mutex
> on the read side therefore excludes concurrent dport freeing.
>
> Fix rcd_pcie_cap_emit() by passing NULL to cxl_mem_find_port() to avoid
> capturing a lockless dport pointer, then re-fetching dport inside the
> uport_dev guard via cxl_find_dport_by_dev(). The previous guard on
> root->dev was wrong: cxl_dport_remove() releases root->dev before
> free_dport() runs, so root->dev does not protect against concurrent
> kfree(dport).
>
> Fix cxl_mem_probe() similarly: pass NULL to cxl_mem_find_port(), then
> re-fetch dport inside scoped_guard(device, &parent_port->dev) for the
> VH path, and re-fetch again inside scoped_guard(device, uport_dev) for
> the RCH path. This closes both the TOCTOU window between the lockless
> xa_load() and the guard acquisition, and the window between the two
> sequential guards in the RCH path where a concurrent surprise removal
> could free dport before devm_cxl_add_endpoint() dereferences it.
>
> Reported-by: Sashiko
> Fixes: 391785859e7e ("cxl/port: Move dport tracking to an xarray")
> Link: https://lore.kernel.org/linux-cxl/20260505173029.2718246-1-terry.bowman@xxxxxxx/
> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
> Reviewed-by: Ben Cheatham <Benjamin.Cheatham@xxxxxxx>
> ---
> drivers/cxl/core/port.c | 10 +++++++
> drivers/cxl/mem.c | 65 +++++++++++++++++++++++++++++++----------
> drivers/cxl/pci.c | 17 +++++++----
> 3 files changed, 72 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index c5aacd7054f1..0b8f144596e8 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -1092,8 +1092,18 @@ static void cxl_dport_remove(void *data)
> struct cxl_dport *dport = data;
> struct cxl_port *port = dport->port;
>
> + /*
> + * For non-root ports the port lock is already held by the caller
> + * via devres_release_all() during driver unbind, which holds
> + * port->dev.mutex throughout. Acquiring it again unconditionally
> + * would deadlock. Use cond_cxl_root_lock() which only acquires
> + * when the port is a root port and the lock is therefore not yet
> + * held.
> + */
> + cond_cxl_root_lock(port);
> port->nr_dports--;
> xa_erase(&port->dports, (unsigned long) dport->dport_dev);
> + cond_cxl_root_unlock(port);
> put_device(dport->dport_dev);
> }
>
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index fcffe24dcb42..345b56f215ff 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -70,9 +70,9 @@ static int cxl_mem_probe(struct device *dev)
> struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
> struct cxl_dev_state *cxlds = cxlmd->cxlds;
> - struct device *endpoint_parent;
> struct cxl_dport *dport;
> struct dentry *dentry;
> + bool rch = false;
> int rc;
>
> if (!cxlds->media_ready)
> @@ -107,8 +107,7 @@ static int cxl_mem_probe(struct device *dev)
> if (rc)
> return rc;
>
> - struct cxl_port *parent_port __free(put_cxl_port) =
> - cxl_mem_find_port(cxlmd, &dport);
> + struct cxl_port *parent_port __free(put_cxl_port) = cxl_mem_find_port(cxlmd, NULL);
> if (!parent_port) {
> dev_err(dev, "CXL port topology not found\n");
> return -ENXIO;
> @@ -123,21 +122,57 @@ static int cxl_mem_probe(struct device *dev)
> }
> }
>
> - if (dport->rch)
> - endpoint_parent = parent_port->uport_dev;
> - else
> - endpoint_parent = &parent_port->dev;
> -
> - scoped_guard(device, endpoint_parent) {
> - if (!endpoint_parent->driver) {
> - dev_err(dev, "CXL port topology %s not enabled\n",
> - dev_name(endpoint_parent));
> + scoped_guard(device, &parent_port->dev) {
> + /*
> + * Re-fetch dport under the port lock to close the TOCTOU
> + * window between cxl_mem_find_port()'s lockless xa_load() and
> + * this guard acquisition. A concurrent surprise removal can
> + * free the dport in that window.
> + */
> + dport = cxl_find_dport_by_dev(parent_port, cxlmd->dev.parent->parent);
> + if (!dport) {
> + dev_err(dev, "CXL port topology %s not found\n",
> + dev_name(&parent_port->dev));
> return -ENXIO;
> }
> + rch = dport->rch;
> +
> + if (!rch) {
> + if (!parent_port->dev.driver) {
> + dev_err(dev, "CXL port topology %s not enabled\n",
> + dev_name(&parent_port->dev));
> + return -ENXIO;
> + }
> + rc = devm_cxl_add_endpoint(&parent_port->dev, cxlmd, dport);
> + if (rc)
> + return rc;
> + }
> + }
>
> - rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport);
> - if (rc)
> - return rc;
> + if (rch) {
> + struct device *uport_dev = parent_port->uport_dev;
> +
> + scoped_guard(device, uport_dev) {
> + if (!uport_dev->driver) {
> + dev_err(dev, "CXL port topology %s not enabled\n",
> + dev_name(uport_dev));
> + return -ENXIO;
> + }
> + /*
> + * Re-fetch dport under uport_dev lock. uport_dev->mutex
> + * is held for the full devres teardown sequence including
> + * free_dport()/kfree(), so this excludes concurrent
> + * hotplug removal through the entire dereference.
> + */
> + dport = cxl_find_dport_by_dev(parent_port, cxlmd->dev.parent->parent);
> + if (!dport) {
> + dev_err(dev, "CXL RCH dport not found\n");
> + return -ENXIO;
> + }
> + rc = devm_cxl_add_endpoint(uport_dev, cxlmd, dport);
> + if (rc)
> + return rc;
> + }
Still reviewing the patch, but thoughts on moving the two new big blocks to a helper function?
DJ
> }
>
> if (cxlmd->attach) {
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index bace662dc988..710a62a66429 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -708,10 +708,10 @@ static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size
> {
> struct cxl_dev_state *cxlds = dev_get_drvdata(dev);
> struct cxl_memdev *cxlmd = cxlds->cxlmd;
> - struct device *root_dev;
> struct cxl_dport *dport;
> + struct device *root_dev;
> struct cxl_port *root __free(put_cxl_port) =
> - cxl_mem_find_port(cxlmd, &dport);
> + cxl_mem_find_port(cxlmd, NULL);
>
> if (!root)
> return -ENXIO;
> @@ -720,13 +720,20 @@ static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size
> if (!root_dev)
> return -ENXIO;
>
> - if (!dport->regs.rcd_pcie_cap)
> - return -ENXIO;
> -
> guard(device)(root_dev);
> if (!root_dev->driver)
> return -ENXIO;
>
> + /*
> + * Fetch dport under uport_dev lock to protect against concurrent
> + * hotplug removal. uport_dev->mutex is held for the entire devres
> + * teardown sequence including free_dport(), so holding it here
> + * excludes concurrent kfree(dport).
> + */
> + dport = cxl_find_dport_by_dev(root, cxlmd->dev.parent->parent);
> + if (!dport || !dport->regs.rcd_pcie_cap)
> + return -ENXIO;
> +
> switch (width) {
> case 2:
> return sysfs_emit(buf, "%#x\n",