Re: [PATCH v6 6/9] cxl/pci: Track memdevs affected by CXL reset

From: Cheatham, Benjamin

Date: Tue Jun 02 2026 - 16:37:48 EST


On 5/28/2026 3:31 AM, Srirangan Madhavan wrote:
> CXL reset is scoped to the CXL.cache/mem function set, so reset
> orchestration needs to account for the target memdev and any affected
> sibling-function memdevs.

I would move this patch to be right after 4/9 since it's doing the back
half of finding the devices affected by reset.
>
> Add reset context tracking for affected memdevs. Collect the memdevs
> associated with the target and sibling PCI functions, track which ones
> are active, collect their regions, and provide helpers to lock and
> revalidate the active memdevs before reset proceeds.
>
> The reset orchestration and CXL.mem restore flow are added separately.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
> ---
> drivers/cxl/core/pci.c | 176 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 176 insertions(+)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 1dd880f5a333..c755c18c8d84 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -1106,8 +1106,17 @@ cxl_reset_flush_cpu_caches(struct cxl_reset_region_context *ctx)
> return 0;
> }
>
> +struct cxl_reset_memdev {
> + struct cxl_memdev *cxlmd;
> + bool active;
> + bool locked;
> +};
> +
> struct cxl_reset_context {
> struct pci_dev *target;
> + struct cxl_reset_memdev *memdevs;
> + int nr_memdevs;
> + int memdev_capacity;
> struct pci_dev **siblings;
> int nr_siblings;
> int sibling_capacity;
> @@ -1237,6 +1246,173 @@ static int cxl_reset_collect_siblings(struct cxl_reset_context *ctx)
> return wctx.rc;
> }
>
> +static int cxl_reset_match_memdev_by_parent(struct device *dev,
> + const void *parent)
> +{
> + return is_cxl_memdev(dev) && dev->parent == parent;
> +}
> +
> +static bool cxl_reset_memdev_active(struct cxl_memdev *cxlmd)
> +{
> + return cxlmd->dev.driver && cxlmd->endpoint &&
> + !IS_ERR(cxlmd->endpoint);

You can replace the last two checks with !IS_ERR_OR_NULL(cxlmd->endpoint).
> +}
> +
> +static int cxl_reset_collect_pci_memdev(struct cxl_reset_context *ctx,
> + struct pci_dev *pdev)
> +{
> + struct cxl_reset_memdev *memdevs;
> + struct cxl_memdev *cxlmd;
> + struct device *dev;
> + int capacity, i;
> +
> + dev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev,
> + cxl_reset_match_memdev_by_parent);
> + if (!dev)
> + return 0;
> +
> + cxlmd = to_cxl_memdev(dev);
> + for (i = 0; i < ctx->nr_memdevs; i++) {
> + if (ctx->memdevs[i].cxlmd == cxlmd) {
> + put_device(dev);
> + return 0;
> + }
> + }
> +
> + if (ctx->nr_memdevs < ctx->memdev_capacity)
> + goto add;

The goto here isn't great, I'd just add the memdev inside the if statement
and return here.
> +
> + capacity = ctx->memdev_capacity ? ctx->memdev_capacity * 2 :
> + CXL_RESET_SIBLINGS_INIT;
> + memdevs = krealloc(ctx->memdevs, capacity * sizeof(*memdevs),
> + GFP_KERNEL);
> + if (!memdevs) {
> + put_device(dev);

Should you null out ctx->memdevs here? Is it possible it would have a stale value at this
point?
> + return -ENOMEM;
> + }
> +
> + ctx->memdevs = memdevs;
> + ctx->memdev_capacity = capacity;
> +
> +add:
> + ctx->memdevs[ctx->nr_memdevs++] = (struct cxl_reset_memdev) {
> + .cxlmd = cxlmd,
> + };
> + return 0;
> +}
> +
> +/*
> + * CXL Reset is device scoped for CXL.cache/mem. Use the affected PCI
> + * function set to find memdevs whose regions and endpoint decoder state must
> + * be handled around the reset.
> + */
> +static int __maybe_unused cxl_reset_collect_memdevs(struct cxl_reset_context *ctx)
> +{
> + int rc, i;
> +
> + rc = cxl_reset_collect_pci_memdev(ctx, ctx->target);
> + if (rc)
> + return rc;
> +
> + for (i = 0; i < ctx->nr_siblings; i++) {
> + rc = cxl_reset_collect_pci_memdev(ctx, ctx->siblings[i]);
> + if (rc)
> + return rc;
> + }
> +
> + return 0;
> +}
> +
> +static int __maybe_unused
> +cxl_reset_collect_regions(struct cxl_reset_context *ctx,
> + struct cxl_reset_region_context *region_ctx)
> +{
> + int rc, i;
> +
> + lockdep_assert_held_write(&cxl_rwsem.region);
> +
> + for (i = 0; i < ctx->nr_memdevs; i++) {
> + struct cxl_reset_memdev *rmd = &ctx->memdevs[i];
> + struct cxl_memdev *cxlmd = rmd->cxlmd;
> +
> + if (!device_trylock(&cxlmd->dev))
> + return -EAGAIN;

Use ACQUIRE() here.

> +
> + if (cxl_reset_memdev_active(cxlmd)) {
> + rc = cxl_reset_collect_memdev_regions(region_ctx,
> + cxlmd);
> + if (!rc)
> + rmd->active = true;
> + } else {
> + rc = 0;
> + }
> +
> + device_unlock(&cxlmd->dev);
> + if (rc)
> + return rc;
> + }
> +
> + return 0;
> +}
> +
> +static void cxl_reset_unlock_memdevs(struct cxl_reset_context *ctx)
> +{
> + int i;
> +
> + for (i = ctx->nr_memdevs - 1; i >= 0; i--) {
> + struct cxl_reset_memdev *rmd = &ctx->memdevs[i];
> +
> + if (!rmd->locked)
> + continue;
> +
> + device_unlock(&rmd->cxlmd->dev);
> + rmd->locked = false;
> + }
> +}
> +
> +static int __maybe_unused cxl_reset_lock_memdevs(struct cxl_reset_context *ctx)
> +{
> + int i;
> +
> + lockdep_assert_held_write(&cxl_rwsem.region);
> +
> + for (i = 0; i < ctx->nr_memdevs; i++) {
> + struct cxl_reset_memdev *rmd = &ctx->memdevs[i];
> + struct cxl_memdev *cxlmd = rmd->cxlmd;
> +
> + if (!rmd->active)
> + continue;
> +
> + if (!device_trylock(&cxlmd->dev))
> + goto err;
> +
> + rmd->locked = true;
> + if (!cxl_reset_memdev_active(cxlmd)) {
> + cxl_reset_unlock_memdevs(ctx);
> + return -ENODEV;
> + }
> + }
> +
> + return 0;
> +
> +err:
> + cxl_reset_unlock_memdevs(ctx);
> + return -EAGAIN;
> +}
> +
> +static void __maybe_unused cxl_reset_put_memdevs(struct cxl_reset_context *ctx)
> +{
> + int i;
> +
> + for (i = 0; i < ctx->nr_memdevs; i++)
> + put_device(&ctx->memdevs[i].cxlmd->dev);
> +
> + kfree(ctx->memdevs);
> + ctx->memdevs = NULL;
> + ctx->nr_memdevs = 0;
> + ctx->memdev_capacity = 0;
> +}
> +
> static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
> {
> int i;