[PATCH] netdevsim: Fix task hung by releasing bus lock before device ops
From: Jiakai Xu
Date: Sat May 09 2026 - 05:29:23 EST
The new_device_store and del_device_store sysfs handlers hold
nsim_bus_dev_list_lock across device_register() and device_unregister()
calls, which in turn acquire rtnl_lock and devl_lock. This creates
a lock hold-time inversion: while one thread holds nsim_bus_dev_list_lock
and waits for rtnl_lock (acquired during probe), all other threads
attempting new_device_store or del_device_store are blocked on
nsim_bus_dev_list_lock, and threads waiting for rtnl_lock are also
blocked.
Fix by:
1. Moving nsim_bus_dev_new() (which calls device_register()) outside
the nsim_bus_dev_list_lock critical section in new_device_store
2. Releasing nsim_bus_dev_list_lock before calling nsim_bus_dev_del()
(which calls device_unregister()) in del_device_store
3. Moving refcount_inc(&nsim_bus_devs) into nsim_bus_dev_new() before
device_register(), so the refcount correctly accounts for the
device even if the bus is being torn down concurrently
Signed-off-by: Jiakai Xu <xujiakai24@xxxxxxxxxxxxxxxx>
---
drivers/net/netdevsim/bus.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 41483e371f05..0e15c8605997 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -181,20 +181,18 @@ new_device_store(const struct bus_type *bus, const char *buf, size_t count)
return -EINVAL;
}
+ nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues);
+ if (IS_ERR(nsim_bus_dev))
+ return PTR_ERR(nsim_bus_dev);
+
mutex_lock(&nsim_bus_dev_list_lock);
/* Prevent to use resource before initialization. */
if (!smp_load_acquire(&nsim_bus_enable)) {
- err = -EBUSY;
- goto err;
- }
-
- nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues);
- if (IS_ERR(nsim_bus_dev)) {
- err = PTR_ERR(nsim_bus_dev);
- goto err;
+ mutex_unlock(&nsim_bus_dev_list_lock);
+ nsim_bus_dev_del(nsim_bus_dev);
+ return -EBUSY;
}
- refcount_inc(&nsim_bus_devs);
/* Allow using nsim_bus_dev */
smp_store_release(&nsim_bus_dev->init, true);
@@ -202,9 +200,6 @@ new_device_store(const struct bus_type *bus, const char *buf, size_t count)
mutex_unlock(&nsim_bus_dev_list_lock);
return count;
-err:
- mutex_unlock(&nsim_bus_dev_list_lock);
- return err;
}
static BUS_ATTR_WO(new_device);
@@ -241,9 +236,9 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
if (nsim_bus_dev->dev.id != id)
continue;
list_del(&nsim_bus_dev->list);
+ mutex_unlock(&nsim_bus_dev_list_lock);
nsim_bus_dev_del(nsim_bus_dev);
- err = 0;
- break;
+ return count;
}
mutex_unlock(&nsim_bus_dev_list_lock);
return !err ? count : err;
@@ -468,6 +463,11 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queu
/* Disallow using nsim_bus_dev */
smp_store_release(&nsim_bus_dev->init, false);
+ /* Increment refcount before device_register() so that the device
+ * is accounted for even if the bus is being torn down concurrently.
+ */
+ refcount_inc(&nsim_bus_devs);
+
err = device_register(&nsim_bus_dev->dev);
if (err)
goto err_nsim_bus_dev_id_free;
--
2.34.1