[PATCH net-next V2 14/15] net/mlx5: SD, defer vport metadata init until SD is ready

From: Tariq Toukan

Date: Mon Jun 08 2026 - 10:12:15 EST


From: Shay Drory <shayd@xxxxxxxxxx>

Allow SD devices to transition to switchdev before the SD group is
fully up. Metadata allocation requires the SD group to be ready, so
defer it from esw_offloads_enable() until SD shared-FDB activation.

Add mlx5_esw_offloads_init_deferred_metadata() which allocates
per-vport metadata and refreshes the manager ingress ACLs that were
previously programmed with metadata=0. The helper is idempotent and
can be called multiple times.

Signed-off-by: Shay Drory <shayd@xxxxxxxxxx>
Reviewed-by: Mark Bloch <mbloch@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 +
.../mellanox/mlx5/core/eswitch_offloads.c | 77 ++++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/lib/sd.c | 16 ++++
3 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a5f0774834fe..ecf6a28a1c08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -440,6 +440,7 @@ struct mlx5_eswitch {

void esw_offloads_disable(struct mlx5_eswitch *esw);
int esw_offloads_enable(struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw);
void esw_offloads_cleanup(struct mlx5_eswitch *esw);
int esw_offloads_init(struct mlx5_eswitch *esw);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e87837fbc372..9aec470fe126 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,6 +43,7 @@
#include "esw/acl/ofld.h"
#include "rdma.h"
#include "en.h"
+#include "en_rep.h"
#include "fs_core.h"
#include "lib/mlx5.h"
#include "lib/devcom.h"
@@ -3675,6 +3676,7 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,

WARN_ON(vport->metadata != vport->default_metadata);
mlx5_esw_match_metadata_free(esw, vport->default_metadata);
+ vport->default_metadata = 0;
}

static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
@@ -3711,6 +3713,70 @@ static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
return err;
}

+/* Deferred metadata init for SD devices: allocate vport metadata and
+ * refresh the ingress ACL for every vport whose ACL was created with
+ * metadata=0 in esw_create_offloads_acl_tables() / esw_vport_setup().
+ *
+ * No Rep is loaded at this point ==> no Rep net-dev exists, so no need
+ * to take rtnl lock.
+ *
+ * Safe to call multiple times - subsequent calls are no-ops.
+ */
+int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *manager, *vport;
+ unsigned long i;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
+
+ manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(manager))
+ return PTR_ERR(manager);
+
+ /* Sanity check: skip if metadata was already initialized */
+ if (manager->default_metadata)
+ return 0;
+
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ return err;
+
+ /* Manager vport doesn't have a rep/netdev loaded but its ingress ACL
+ * was programmed with metadata=0 - refresh it explicitly.
+ */
+ err = mlx5_esw_acl_ingress_vport_metadata_update(esw,
+ esw->manager_vport,
+ 0);
+ if (err)
+ goto err_acl;
+
+ /* UPLINK is never marked enabled but its ACL is programmed in
+ * esw_create_offloads_acl_tables(); refresh it explicitly.
+ */
+ err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK,
+ 0);
+ if (err)
+ goto err_acl;
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport || !vport->enabled)
+ continue;
+ err = mlx5_esw_acl_ingress_vport_metadata_update(esw,
+ vport->vport,
+ 0);
+ if (err)
+ goto err_acl;
+ }
+
+ return 0;
+
+err_acl:
+ esw_offloads_metadata_uninit(esw);
+ return err;
+}
+
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
@@ -4053,9 +4119,14 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_roce;

- err = esw_offloads_metadata_init(esw);
- if (err)
- goto err_metadata;
+ /* SD devices defer metadata init until SD is ready and
+ * mlx5_sd_pf_num_get() can return the correct pf_num.
+ */
+ if (!mlx5_get_sd(esw->dev)) {
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ goto err_metadata;
+ }

err = esw_set_passing_vport_metadata(esw, true);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 9ff62c134c2a..d74a5a2862cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -988,6 +988,7 @@ static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary)
static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
{
struct mlx5_sd *sd = mlx5_get_sd(primary);
+ struct mlx5_core_dev *pos;
struct mlx5_lag *ldev;
struct lag_func *pf;
int err;
@@ -1020,6 +1021,21 @@ static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
goto unlock;
}

+ /* Initialize vport metadata for all group devices. This is deferred
+ * from esw_offloads_enable() because mlx5_sd_pf_num_get() requires
+ * the SD group to be ready.
+ */
+ mlx5_sd_for_each_dev(i, primary, pos) {
+ struct mlx5_eswitch *esw = pos->priv.eswitch;
+
+ err = mlx5_esw_offloads_init_deferred_metadata(esw);
+ if (err) {
+ sd_warn(primary, "Failed to init metadata for %s: %d\n",
+ dev_name(pos->device), err);
+ goto unlock;
+ }
+ }
+
err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id);
if (err)
sd_warn(primary, "Failed to create shared FDB: %d\n", err);
--
2.44.0