[PATCH net-next 12/15] net/mlx5: LAG, add MPESW over SD LAG support
From: Tariq Toukan
Date: Thu Jun 04 2026 - 08:02:39 EST
From: Shay Drory <shayd@xxxxxxxxxx>
Enable MPESW LAG creation over SD LAG members, forming a composite LAG
hierarchy. This allows bonding multiple SD groups together under a
single MPESW configuration with shared FDB.
When enabling composite MPESW, the individual SD LAG shared FDB
configurations are temporarily torn down and recreated when the
composite LAG is disabled.
Signed-off-by: Shay Drory <shayd@xxxxxxxxxx>
Reviewed-by: Mark Bloch <mbloch@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/lag/lag.c | 6 ++
.../net/ethernet/mellanox/mlx5/core/lag/lag.h | 8 ++
.../ethernet/mellanox/mlx5/core/lag/mpesw.c | 95 +++++++++++++++++--
.../ethernet/mellanox/mlx5/core/lag/mpesw.h | 4 +
4 files changed, 105 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 9566fbf59fdb..25a9012e3014 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -2545,6 +2545,7 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
struct mlx5_core_dev *primary;
struct mlx5_lag *ldev;
struct lag_func *pf;
+ bool mpesw;
int i;
ldev = mlx5_lag_dev(dev);
@@ -2553,6 +2554,9 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
primary = mlx5_sd_get_primary(dev) ?: dev;
mlx5_devcom_comp_lock(primary->priv.hca_devcom_comp);
+ mpesw = ldev->mode == MLX5_LAG_MODE_MPESW;
+ if (mpesw)
+ mlx5_mpesw_sd_devcoms_lock(ldev);
mutex_lock(&ldev->lock);
ldev->mode_changes_in_progress++;
@@ -2564,6 +2568,8 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
}
mutex_unlock(&ldev->lock);
+ if (mpesw)
+ mlx5_mpesw_sd_devcoms_unlock(ldev);
mlx5_devcom_comp_unlock(primary->priv.hca_devcom_comp);
if (!sd_devcom)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 34350b0a7307..3a90d360d724 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -157,6 +157,14 @@ __mlx5_lag_is_sd(struct mlx5_lag *ldev, struct mlx5_core_dev *dev)
return pf && pf->group_id != 0;
}
+static inline bool
+__mlx5_lag_dev_is_port(struct mlx5_lag *ldev, struct mlx5_core_dev *dev)
+{
+ struct lag_func *pf = mlx5_lag_pf_by_dev(ldev, dev);
+
+ return pf && xa_get_mark(&ldev->pfs, pf->idx, MLX5_LAG_XA_MARK_PORT);
+}
+
static inline bool
__mlx5_lag_is_active(struct mlx5_lag *ldev)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 2cb44084e239..50bfb450c71e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -15,7 +15,7 @@ static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev)
u32 pf_metadata;
int i;
- mlx5_ldev_for_each(i, 0, ldev) {
+ mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
dev = mlx5_lag_pf(ldev, i)->dev;
esw = dev->priv.eswitch;
pf_metadata = ldev->lag_mpesw.pf_metadata[i];
@@ -36,7 +36,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
u32 pf_metadata;
int i, err;
- mlx5_ldev_for_each(i, 0, ldev) {
+ mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
dev = mlx5_lag_pf(ldev, i)->dev;
esw = dev->priv.eswitch;
pf_metadata = mlx5_esw_match_metadata_alloc(esw);
@@ -52,7 +52,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
goto err_metadata;
}
- mlx5_ldev_for_each(i, 0, ldev) {
+ mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
dev = mlx5_lag_pf(ldev, i)->dev;
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW,
(void *)0);
@@ -65,6 +65,48 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
return err;
}
+static void mlx5_mpesw_restore_sd_fdb(struct mlx5_lag *ldev)
+{
+ struct lag_func *pf;
+ int err, i;
+
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, pf->group_id);
+ if (err)
+ mlx5_core_warn(pf->dev,
+ "Failed to restore SD shared FDB (%d)\n",
+ err);
+ }
+}
+
+static int mlx5_mpesw_teardown_sd_fdb(struct mlx5_lag *ldev)
+{
+ struct lag_func *pf;
+ int i;
+
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (!pf->sd_fdb_active)
+ continue;
+ mlx5_lag_shared_fdb_destroy(ldev, pf->group_id);
+ }
+ return 0;
+}
+
+static bool mlx5_lag_has_sd_group(struct mlx5_lag *ldev)
+{
+ struct lag_func *pf;
+ int i;
+
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->group_id)
+ return true;
+ }
+ return false;
+}
+
static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
{
int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
@@ -92,10 +134,17 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
if (err)
return err;
+ if (mlx5_lag_has_sd_group(ldev))
+ mlx5_mpesw_teardown_sd_fdb(ldev);
+
err = mlx5_lag_shared_fdb_create(ldev, NULL, MLX5_LAG_MODE_MPESW,
MLX5_LAG_FILTER_ALL);
if (err) {
- mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err);
+ mlx5_core_warn(dev0,
+ "Failed to create LAG in MPESW mode (%d)\n",
+ err);
+ if (mlx5_lag_has_sd_group(ldev))
+ mlx5_mpesw_restore_sd_fdb(ldev);
mlx5_mpesw_metadata_cleanup(ldev);
return err;
}
@@ -105,9 +154,36 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev)
{
- if (ldev->mode == MLX5_LAG_MODE_MPESW) {
- mlx5_mpesw_metadata_cleanup(ldev);
- mlx5_lag_shared_fdb_destroy(ldev, MLX5_LAG_FILTER_ALL);
+ if (ldev->mode != MLX5_LAG_MODE_MPESW)
+ return;
+
+ mlx5_mpesw_metadata_cleanup(ldev);
+ mlx5_lag_shared_fdb_destroy(ldev, MLX5_LAG_FILTER_ALL);
+ if (mlx5_lag_has_sd_group(ldev))
+ mlx5_mpesw_restore_sd_fdb(ldev);
+}
+
+void mlx5_mpesw_sd_devcoms_lock(struct mlx5_lag *ldev)
+{
+ struct mlx5_devcom_comp_dev *sd_devcom;
+ int i;
+
+ mlx5_ldev_for_each(i, 0, ldev) {
+ sd_devcom = mlx5_sd_get_devcom(mlx5_lag_pf(ldev, i)->dev);
+ if (sd_devcom)
+ mlx5_devcom_comp_lock(sd_devcom);
+ }
+}
+
+void mlx5_mpesw_sd_devcoms_unlock(struct mlx5_lag *ldev)
+{
+ struct mlx5_devcom_comp_dev *sd_devcom;
+ int i;
+
+ mlx5_ldev_for_each_reverse(i, MLX5_MAX_PORTS, 0, ldev) {
+ sd_devcom = mlx5_sd_get_devcom(mlx5_lag_pf(ldev, i)->dev);
+ if (sd_devcom)
+ mlx5_devcom_comp_unlock(sd_devcom);
}
}
@@ -122,6 +198,7 @@ static void mlx5_mpesw_work(struct work_struct *work)
return;
mlx5_devcom_comp_lock(devcom);
+ mlx5_mpesw_sd_devcoms_lock(ldev);
mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
mpesww->result = -EAGAIN;
@@ -134,6 +211,7 @@ static void mlx5_mpesw_work(struct work_struct *work)
mlx5_lag_disable_mpesw(ldev);
unlock:
mutex_unlock(&ldev->lock);
+ mlx5_mpesw_sd_devcoms_unlock(ldev);
mlx5_devcom_comp_unlock(devcom);
complete(&mpesww->comp);
}
@@ -199,7 +277,8 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = mlx5_lag_dev(dev);
- return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
+ return ldev && ldev->mode == MLX5_LAG_MODE_MPESW &&
+ __mlx5_lag_dev_is_port(ldev, dev);
}
EXPORT_SYMBOL(mlx5_lag_is_mpesw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
index b767dbb4f457..5099723ba0f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -33,8 +33,12 @@ void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev);
int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev);
#ifdef CONFIG_MLX5_ESWITCH
void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev);
+void mlx5_mpesw_sd_devcoms_lock(struct mlx5_lag *ldev);
+void mlx5_mpesw_sd_devcoms_unlock(struct mlx5_lag *ldev);
#else
static inline void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) {}
+static inline void mlx5_mpesw_sd_devcoms_lock(struct mlx5_lag *ldev) {}
+static inline void mlx5_mpesw_sd_devcoms_unlock(struct mlx5_lag *ldev) {}
#endif /* CONFIG_MLX5_ESWITCH */
#ifdef CONFIG_MLX5_ESWITCH
--
2.44.0