[PATCH net-next V3 14/15] net/mlx5: SD, defer vport metadata init until SD is ready
From: Tariq Toukan
Date: Fri Jun 12 2026 - 07:48:19 EST
From: Shay Drory <shayd@xxxxxxxxxx>
Allow SD devices to transition to switchdev before the SD group is
fully up. Metadata allocation requires the SD group to be ready, so
defer it from esw_offloads_enable() until SD shared-FDB activation.
Add mlx5_esw_offloads_init_deferred_metadata() which allocates per-vport
metadata and refreshes the ingress ACLs that were previously programmed
with metadata=0. The helper is idempotent and can be called multiple
times.
Signed-off-by: Shay Drory <shayd@xxxxxxxxxx>
Reviewed-by: Mark Bloch <mbloch@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 +
.../mellanox/mlx5/core/eswitch_offloads.c | 79 ++++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/lib/sd.c | 16 ++++
3 files changed, 93 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b2b3150f1f04..fea72b1dedab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -440,6 +440,7 @@ struct mlx5_eswitch {
void esw_offloads_disable(struct mlx5_eswitch *esw);
int esw_offloads_enable(struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw);
void esw_offloads_cleanup(struct mlx5_eswitch *esw);
int esw_offloads_init(struct mlx5_eswitch *esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 4dc190a4e7b2..8fa7e633451c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3675,6 +3675,7 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
WARN_ON(vport->metadata != vport->default_metadata);
mlx5_esw_match_metadata_free(esw, vport->default_metadata);
+ vport->default_metadata = 0;
}
static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
@@ -3711,6 +3712,73 @@ static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
return err;
}
+/* Deferred metadata init for SD devices: allocate vport metadata and
+ * refresh the ingress ACL for every vport whose ACL was created with
+ * metadata=0 in esw_create_offloads_acl_tables() / esw_vport_setup().
+ *
+ * No Rep is loaded at this point ==> no Rep net-dev exists, so no need
+ * to take rtnl lock.
+ *
+ * Safe to call multiple times - subsequent calls are no-ops.
+ */
+int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *manager, *vport;
+ unsigned long i;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
+
+ manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(manager))
+ return PTR_ERR(manager);
+
+ /* Sanity check: skip if metadata was already initialized */
+ if (manager->default_metadata)
+ return 0;
+
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ /* Manager vport doesn't have a rep/netdev loaded but its ingress ACL
+ * was programmed with metadata=0 - refresh it explicitly.
+ */
+ err = mlx5_esw_acl_ingress_vport_metadata_update(esw,
+ esw->manager_vport,
+ 0);
+ if (err)
+ goto err_acl;
+
+ /* UPLINK is never marked enabled but its ACL is programmed in
+ * esw_create_offloads_acl_tables(); refresh it explicitly.
+ */
+ err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK,
+ 0);
+ if (err)
+ goto err_acl;
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport || !vport->enabled)
+ continue;
+ err = mlx5_esw_acl_ingress_vport_metadata_update(esw,
+ vport->vport,
+ 0);
+ if (err)
+ goto err_acl;
+ }
+
+ mutex_unlock(&esw->state_lock);
+ return 0;
+
+err_acl:
+ esw_offloads_metadata_uninit(esw);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
@@ -4072,9 +4140,14 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_roce;
- err = esw_offloads_metadata_init(esw);
- if (err)
- goto err_metadata;
+ /* SD devices defer metadata init until SD is ready and
+ * mlx5_sd_pf_num_get() can return the correct pf_num.
+ */
+ if (!mlx5_get_sd(esw->dev)) {
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ goto err_metadata;
+ }
err = esw_set_passing_vport_metadata(esw, true);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index b35795bac098..2fcccd329eb5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -992,6 +992,7 @@ static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary)
static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
{
struct mlx5_sd *sd = mlx5_get_sd(primary);
+ struct mlx5_core_dev *pos;
struct mlx5_lag *ldev;
struct lag_func *pf;
int err;
@@ -1024,6 +1025,21 @@ static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
goto unlock;
}
+ /* Initialize vport metadata for all group devices. This is deferred
+ * from esw_offloads_enable() because mlx5_sd_pf_num_get() requires
+ * the SD group to be ready.
+ */
+ mlx5_sd_for_each_dev(i, primary, pos) {
+ struct mlx5_eswitch *esw = pos->priv.eswitch;
+
+ err = mlx5_esw_offloads_init_deferred_metadata(esw);
+ if (err) {
+ sd_warn(primary, "Failed to init metadata for %s: %d\n",
+ dev_name(pos->device), err);
+ goto unlock;
+ }
+ }
+
err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id);
if (err)
sd_warn(primary, "Failed to create shared FDB: %d\n", err);
--
2.44.0