[PATCH v2] thunderbolt: prevent XDomain delayed work use-after-free on disconnect
From: Michael Bommarito
Date: Wed May 27 2026 - 07:48:12 EST
tb_xdp_handle_request() runs on system_wq and queues
xd->state_work via queue_delayed_work() in three request handlers:
PROPERTIES_CHANGED_REQUEST, UUID_REQUEST (via start_handshake),
and LINK_STATE_CHANGE_REQUEST. Similarly, update_xdomain() queues
xd->properties_changed_work when local properties change.
Concurrently, tb_xdomain_remove() calls stop_handshake() which does
cancel_delayed_work_sync() on both delayed works. Later,
tb_xdomain_unregister() calls device_unregister() which eventually
frees the xdomain. Since commit 559c1e1e0134 ("thunderbolt: Run
tb_xdp_handle_request() in system workqueue") moved the request
handler off tb->wq, the handler and the remove path are no longer
serialized. If queue_delayed_work() executes after
cancel_delayed_work_sync() but before the xdomain is freed, the
delayed work fires on a freed object.
Add xd->removing that tb_xdomain_remove() sets under xd->lock
before calling stop_handshake(). Each external queue site holds
the same lock and checks removing before calling
queue_delayed_work(). This provides the mutual exclusion needed:
either the queue site acquires the lock first and queues work that
the subsequent cancel will see, or the remove path acquires the
lock first and the queue site observes removing == true and skips
the queue.
Fixes: 559c1e1e0134 ("thunderbolt: Run tb_xdp_handle_request() in system workqueue")
Cc: stable@xxxxxxxxxxxxxxx
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Michael Bommarito <michael.bommarito@xxxxxxxxx>
---
v2: Rebased onto thunderbolt.git/next per Mika's request. Verified
the race persists on next: tb_xdp_handle_request still runs on
system_wq, the remove/unregister split does not add
synchronization with the queue sites. Updated commit message to
reflect that tb_xdomain_unregister() now does the
device_unregister (split from tb_xdomain_remove on next).
drivers/thunderbolt/xdomain.c | 41 ++++++++++++++++++++++++++---------
include/linux/thunderbolt.h | 3 +++
2 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index 781d88d06b935..fe6c5ac703f4d 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -803,9 +803,13 @@ static void tb_xdp_handle_request(struct work_struct *work)
* the xdomain related to this connection as well in
* case there is a change in services it offers.
*/
- if (xd && device_is_registered(&xd->dev))
- queue_delayed_work(tb->wq, &xd->state_work,
- msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+ if (xd) {
+ mutex_lock(&xd->lock);
+ if (!xd->removing && device_is_registered(&xd->dev))
+ queue_delayed_work(tb->wq, &xd->state_work,
+ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+ mutex_unlock(&xd->lock);
+ }
break;
case UUID_REQUEST_OLD:
@@ -818,8 +822,12 @@ static void tb_xdp_handle_request(struct work_struct *work)
* received UUID request from the remote host.
*/
if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) {
- dev_dbg(&xd->dev, "restarting handshake\n");
- start_handshake(xd);
+ mutex_lock(&xd->lock);
+ if (!xd->removing) {
+ dev_dbg(&xd->dev, "restarting handshake\n");
+ start_handshake(xd);
+ }
+ mutex_unlock(&xd->lock);
}
break;
@@ -885,9 +893,13 @@ static void tb_xdp_handle_request(struct work_struct *work)
ret = tb_xdp_link_state_change_response(ctl, route,
sequence, 0);
- xd->target_link_width = lsc->tlw;
- queue_delayed_work(tb->wq, &xd->state_work,
- msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+ mutex_lock(&xd->lock);
+ if (!xd->removing) {
+ xd->target_link_width = lsc->tlw;
+ queue_delayed_work(tb->wq, &xd->state_work,
+ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+ }
+ mutex_unlock(&xd->lock);
} else {
tb_xdp_error_response(ctl, route, sequence,
ERROR_NOT_READY);
@@ -971,8 +983,12 @@ static int update_xdomain(struct device *dev, void *data)
xd = tb_to_xdomain(dev);
if (xd) {
- queue_delayed_work(xd->tb->wq, &xd->properties_changed_work,
- msecs_to_jiffies(50));
+ mutex_lock(&xd->lock);
+ if (!xd->removing)
+ queue_delayed_work(xd->tb->wq,
+ &xd->properties_changed_work,
+ msecs_to_jiffies(50));
+ mutex_unlock(&xd->lock);
}
return 0;
@@ -2200,6 +2216,11 @@ static int unregister_service(struct device *dev, void *data)
void tb_xdomain_remove(struct tb_xdomain *xd)
{
tb_xdomain_debugfs_remove(xd);
+
+ mutex_lock(&xd->lock);
+ xd->removing = true;
+ mutex_unlock(&xd->lock);
+
stop_handshake(xd);
tb_xdomain_link_exit(xd);
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index b5659f8835171..feb1af175cfde 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -213,6 +213,8 @@ enum tb_link_width {
* @link_width: Width of the downstream facing link
* @link_usb4: Downstream link is USB4
* @is_unplugged: The XDomain is unplugged
+ * @removing: Set by tb_xdomain_remove() under @lock to prevent
+ * concurrent delayed work queueing
* @needs_uuid: If the XDomain does not have @remote_uuid it will be
* queried first
* @service_ids: Used to generate IDs for the services
@@ -262,6 +264,7 @@ struct tb_xdomain {
enum tb_link_width link_width;
bool link_usb4;
bool is_unplugged;
+ bool removing;
bool needs_uuid;
struct ida service_ids;
struct ida in_hopids;
base-commit: d73a08958e66849ea713d2f458b2fcf7b183f987
--
2.53.0