[PATCH v11 1/2] IB/mlx5: Fix transport-domain rollback and initialize lb mutex earlier

From: Prathamesh Deshpande

Date: Tue Apr 14 2026 - 20:21:32 EST


mlx5_ib_alloc_transport_domain() allocates a transport domain and then
may fail in mlx5_ib_enable_lb(). In that case, the allocated TD is leaked.

Fix this by deallocating the TD when mlx5_ib_enable_lb() returns an
error. Also return 0 explicitly in the no-loopback-capability success
branch, and move dev->lb.mutex initialization to mlx5_ib_stage_init_init().

Fixes: 146d2f1af324 ("IB/mlx5: Allocate a Transport Domain for each ucontext")
Signed-off-by: Prathamesh Deshpande <prathameshdeshpande7@xxxxxxxxx>
---
drivers/infiniband/hw/mlx5/main.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e02bfb1479f5..b3b297bc2f2b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2068,9 +2068,13 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
(!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
- return err;
+ return 0;
+
+ err = mlx5_ib_enable_lb(dev, true, false);
+ if (err)
+ mlx5_cmd_dealloc_transport_domain(dev->mdev, *tdn, uid);

- return mlx5_ib_enable_lb(dev, true, false);
+ return err;
}

static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
@@ -4486,6 +4490,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
}

+ mutex_init(&dev->lb.mutex);
+
err = mlx5r_cmd_query_special_mkeys(dev);
if (err)
return err;
@@ -4786,11 +4792,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
if (err)
return err;

- if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
- (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
- MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
- mutex_init(&dev->lb.mutex);
-
if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
err = mlx5_ib_init_var_region(dev);
--
2.43.0