[PATCH v3 1/2] cxl/region: serialize devm action removal via scheduled work
From: Sungwoo Kim
Date: Sun Apr 26 2026 - 23:26:56 EST
devm_remove_action() must be called (1) only once and (2) only if the
device is still bound to a driver. However, several race conditions
allow multiple calls to devm_remove_action().
For example, delete_region_store() and devres_release_all() can race [1]:
CPU0 CPU1
devres_release_all()
// take devres_lock
remove_nodes(devres_head) // mv to local todo
// drop devres_lock delete_region_store()
cxlr = cxl_find_region_by_name() // success
devm_release_action(unregister_region)
devres_release()
devres_remove()
// hold devres_lock
find_dr(devres_head) // does not find it
WARN_ON(-ENOENT)
release_nodes() // drain todo
unregister_region(cxlr) // release() cb
device_del()
To prevent this, this patch introduces a new function,
remove_devm_actions(), that safely performs devm_release_action().
remove_devm_actions() guarantees that devm_release_action() is called
only once by guarding with a flag. Also, it checks if the device is
still bound to a driver before calling devm_remove_action().
In order to check the binding, a device lock must be held. To do this,
Dan suggested [2] using a workqueue, since a new work has no prior lock
and is clean to acquire a device lock.
[1] https://lore.kernel.org/linux-cxl/20260310183644.4rwc7ilmzy4t5xp6@offworld/
[2] https://lore.kernel.org/linux-cxl/69b0a0f8bfb0b_213210026@dwillia2-mobl4.notmuch/
Suggested-by: Dan Williams <djbw@xxxxxxxxxx>
Signed-off-by: Sungwoo Kim <iam@xxxxxxxxxxxx>
---
drivers/cxl/core/port.c | 6 ++++++
drivers/cxl/core/region.c | 27 +++++++++++++++++++++++++++
drivers/cxl/cxl.h | 9 +++++++++
3 files changed, 42 insertions(+)
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index c5aacd7054f1..2f142cea7f26 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2305,6 +2305,12 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, "CXL");
+bool schedule_cxl_region_remove_devm_actions(struct cxl_region *cxlr)
+{
+ return queue_work(cxl_bus_wq, &cxlr->remove_work);
+}
+EXPORT_SYMBOL_NS_GPL(schedule_cxl_region_remove_devm_actions, "CXL");
+
static void add_latency(struct access_coordinate *c, long latency)
{
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e50dc716d4e8..b086ae88b5bb 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -39,6 +39,7 @@
static nodemask_t nodemask_region_seen = NODE_MASK_NONE;
static struct cxl_region *to_cxl_region(struct device *dev);
+static void remove_devm_actions_work(struct work_struct *work);
#define __ACCESS_ATTR_RO(_level, _name) { \
.attr = { .name = __stringify(_name), .mode = 0444 }, \
@@ -2589,6 +2590,8 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
dev->type = &cxl_region_type;
cxl_region_setup_flags(cxlr, &cxlrd->cxlsd.cxld);
+ INIT_WORK(&cxlr->remove_work, remove_devm_actions_work);
+
return cxlr;
}
@@ -2831,6 +2834,30 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
return to_cxl_region(region_dev);
}
+static bool remove_devm_actions(struct cxl_region *cxlr)
+{
+ return schedule_cxl_region_remove_devm_actions(cxlr);
+}
+
+static void remove_devm_actions_work(struct work_struct *work)
+{
+ struct cxl_region *cxlr = container_of(work, typeof(*cxlr), remove_work);
+ struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
+ struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
+
+ if (test_and_set_bit(CXL_REGION_F_DEVM_REMOVE, &cxlr->flags)) {
+ put_device(&cxlr->dev);
+ return;
+ }
+
+ scoped_guard(device, port->uport_dev) {
+ if (port->uport_dev->driver)
+ devm_remove_action(port->uport_dev, unregister_region, cxlr);
+ }
+
+ put_device(&cxlr->dev);
+}
+
static ssize_t delete_region_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 1297594beaec..31ca4e6676ed 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -447,6 +447,12 @@ struct cxl_region_params {
*/
#define CXL_REGION_F_NORMALIZED_ADDRESSING 3
+/* Indicate that this region is being unregistered to prevent a race. */
+#define CXL_REGION_F_UNREGISTER 4
+
+/* Indicate that this region called devm_remove_action. */
+#define CXL_REGION_F_DEVM_REMOVE 5
+
/**
* struct cxl_region - CXL region
* @dev: This region's device
@@ -462,6 +468,7 @@ struct cxl_region_params {
* @coord: QoS access coordinates for the region
* @node_notifier: notifier for setting the access coordinates to node
* @adist_notifier: notifier for calculating the abstract distance of node
+ * @remove_work: trigger the remove action in a safe context to acquire locks
*/
struct cxl_region {
struct device dev;
@@ -477,6 +484,7 @@ struct cxl_region {
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
struct notifier_block node_notifier;
struct notifier_block adist_notifier;
+ struct work_struct remove_work;
};
struct cxl_nvdimm_bridge {
@@ -733,6 +741,7 @@ struct cxl_port *cxl_pci_find_port(struct pci_dev *pdev,
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
struct cxl_dport **dport);
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
+bool schedule_cxl_region_remove_devm_actions(struct cxl_region *cxlr);
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport, int port_id,
--
2.47.3