[PATCH] driver core: Avoid race between bus_add_device() and bus_add_driver()

From: Petr Malat
Date: Sun Feb 04 2024 - 05:53:52 EST


device_add() adds the device to the bus list in bus_add_device(), but
until fw_devlink_link_device() is called, the device must not be probed,
because devlinks created after the device is bound would be stuck in
activating state. Also add and bind uevents could be generated in a wrong
order.

bus_add_driver() lacks any synchronization, which would prevent it from
picking the device and trying to probe it once it's on the bus list.

Straightforward fix could lock the device before bus_add_device() and
unlock it when probing is safe, but this would affect all bus match
callbacks and bus_notifier callbacks, which would suddenly receive
locked device.

Instead, introduce two state booleans in device structure to track the
device adding progress:
- probe_ready starts cleared to 0 and is set to 1 in device_add() once
the device is ready to be probed.
- probe_pending is set to 1 by bus_add_device() before the device is
added to the bus list if drivers_autoprobe is enabled. It's cleared by
__device__attach() called by device_initial_probe().
When bus_add_driver() executes and sees probe_pending is set, it skips
the probe because it knows device_add() will call it when the device is
ready and it will find the driver on the bus list. If both probe_pending
and probe_ready are 0, bus_add_driver() must wait for probe_ready being
set. This can happen if drivers_autoprobe is changed at runtime.

Perform the synchronization before calling bus match callback to be
sure devices are always at the same state of initialization independently
of the match being called from device_add() or bus_add_driver() code path.

Signed-off-by: Petr Malat <oss@xxxxxxxxx>
---
drivers/base/base.h | 1 +
drivers/base/bus.c | 3 ++-
drivers/base/core.c | 2 ++
drivers/base/dd.c | 34 ++++++++++++++++++++++++++++++++++
include/linux/device.h | 8 ++++++++
5 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/base/base.h b/drivers/base/base.h
index eb4c0ace9242..f11d102548ef 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -186,6 +186,7 @@ void deferred_probe_extend_timeout(void);
void driver_deferred_probe_trigger(void);
const char *device_get_devnode(const struct device *dev, umode_t *mode,
kuid_t *uid, kgid_t *gid, const char **tmp);
+void device_set_probe_ready(struct device *dev);

/* /sys/devices directory */
extern struct kset *devices_kset;
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index daee55c9b2d9..78795f168d62 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -503,6 +503,7 @@ int bus_add_device(struct device *dev)
goto out_subsys;

klist_add_tail(&dev->p->knode_bus, &sp->klist_devices);
+ dev->probe_pending = sp->drivers_autoprobe;
return 0;

out_subsys:
@@ -528,7 +529,7 @@ void bus_probe_device(struct device *dev)
if (!sp)
return;

- if (sp->drivers_autoprobe)
+ if (sp->drivers_autoprobe || dev->probe_pending)
device_initial_probe(dev);

mutex_lock(&sp->mutex);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 14d46af40f9a..896f8fec4a71 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3623,6 +3623,8 @@ int device_add(struct device *dev)
}

bus_probe_device(dev);
+ /* Now it's safe for drivers to probe it as well */
+ device_set_probe_ready(dev);

/*
* If all driver registration is done and a newly added device doesn't
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 85152537dbf1..2004280d7a2e 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -997,12 +997,27 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie)
put_device(dev);
}

+/*
+ * To synchronize __device_attach() and __driver_attach(). It's rarely needed,
+ * we can use one instance for all devices and drivers.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(probe_ready_waitqueue);
+
+void device_set_probe_ready(struct device *dev)
+{
+ device_lock(dev);
+ dev->probe_ready = 1;
+ device_unlock(dev);
+ wake_up_all(&probe_ready_waitqueue);
+}
+
static int __device_attach(struct device *dev, bool allow_async)
{
int ret = 0;
bool async = false;

device_lock(dev);
+ dev->probe_pending = 0;
if (dev->p->dead) {
goto out_unlock;
} else if (dev->driver) {
@@ -1159,6 +1174,25 @@ static int __driver_attach(struct device *dev, void *data)
bool async = false;
int ret;

+ /*
+ * Driver is already on the bus drivers list and device_add() hasn't
+ * probed yet. Return, device_add will probe the driver.
+ */
+ if (unlikely(dev->probe_pending)) {
+ device_lock(dev);
+ ret = dev->probe_pending;
+ device_unlock(dev);
+ if (ret)
+ return 0;
+ }
+
+ /*
+ * Device add is running in parallel, we must wait before executing
+ * driver_match_device(), to be sure all data match relies on are there.
+ */
+ if (unlikely(!dev->probe_ready))
+ wait_event(probe_ready_waitqueue, dev->probe_ready);
+
/*
* Lock device and try to bind to it. We drop the error
* here and always return 0, because we need to keep trying
diff --git a/include/linux/device.h b/include/linux/device.h
index 97c4b046c09d..a2c07d57cd85 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -691,6 +691,12 @@ struct device_physical_location {
* and optionall (if the coherent mask is large enough) also
* for dma allocations. This flag is managed by the dma ops
* instance from ->dma_supported.
+ * @probe_ready: Set from device_add() under device lock when the device is
+ * ready to be probed by a driver to prevent the driver from
+ * probing the device too early.
+ * @probe_pending: Set by device_add() before adding the device to the bus list,
+ * if it knows, it will call initial probe. Cleared by
+ * __device_attach() under the device lock.
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
@@ -803,6 +809,8 @@ struct device {
#ifdef CONFIG_DMA_OPS_BYPASS
bool dma_ops_bypass : 1;
#endif
+ bool probe_ready:1;
+ bool probe_pending:1;
};

/**
--
2.39.2