[PATCH net-next 12/13] net/mlx5e: TC, enable steering for SD LAG

From: Tariq Toukan

Date: Wed May 27 2026 - 09:08:43 EST


From: Shay Drory <shayd@xxxxxxxxxx>

Enable TC flow steering for SD LAG mode by extending multiport
eligibility checks and peer flow handling.

SD LAG operates similarly to MPESW for TC offloads - flows on
secondary devices need peer flow creation on the primary, and
multiport forwarding rules are eligible when either MPESW or SD LAG
is active.

Add mlx5_lag_is_sd() helper to query SD LAG mode, and
mlx5_sd_is_primary() to identify the primary device. Redirect uplink
priv/proto_dev queries to the primary device's eswitch in SD
configurations.

Signed-off-by: Shay Drory <shayd@xxxxxxxxxx>
Reviewed-by: Mark Bloch <mbloch@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../ethernet/mellanox/mlx5/core/en/tc_priv.h | 4 ++
.../net/ethernet/mellanox/mlx5/core/en_tc.c | 53 +++++++++++++++++--
.../mellanox/mlx5/core/eswitch_offloads.c | 8 +++
.../net/ethernet/mellanox/mlx5/core/lag/lag.c | 14 +++++
.../net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 +
.../net/ethernet/mellanox/mlx5/core/lib/sd.c | 15 +++++-
.../net/ethernet/mellanox/mlx5/core/lib/sd.h | 2 +
7 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index a0434ceebe69..28cab4bf525c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -104,6 +104,10 @@ struct mlx5e_tc_flow {
* due to missing route)
*/
struct list_head peer_flows; /* flows on peer */
+ int peer_index; /* peer-flow index pinned at add time, used at del
+ * time so removal is independent of LAG state
+ * changes between add and del.
+ */
struct net_device *orig_dev; /* netdev adding flow first */
int tmp_entry_index;
struct list_head tmp_list; /* temporary flow list used by neigh update */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2a16368a948e..910492eb51f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -71,6 +71,7 @@
#include <asm/div64.h>
#include "lag/lag.h"
#include "lag/mp.h"
+#include "lib/sd.h"

#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
@@ -2132,7 +2133,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
mutex_unlock(&esw->offloads.peer_mutex);

list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
- if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev))
+ if (peer_index != peer_flow->peer_index)
continue;

list_del(&peer_flow->peer_flows);
@@ -4196,9 +4197,26 @@ static bool is_lag_dev(struct mlx5e_priv *priv,
same_hw_reps(priv, peer_netdev));
}

+static bool is_sd_eligible(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
+{
+ struct mlx5e_priv *peer_priv;
+
+ peer_priv = netdev_priv(peer_netdev);
+ return same_hw_reps(priv, peer_netdev) &&
+ mlx5_lag_is_sd(priv->mdev) &&
+ (mlx5_sd_get_primary(priv->mdev) ==
+ mlx5_sd_get_primary(peer_priv->mdev));
+}
+
static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
{
- return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev);
+ struct mlx5_core_dev *primary = mlx5_sd_get_primary(priv->mdev);
+
+ if (!primary)
+ return false;
+
+ return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(primary);
}

bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
@@ -4207,6 +4225,9 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
if (is_merged_eswitch_vfs(priv, out_dev))
return true;

+ if (is_sd_eligible(priv, out_dev))
+ return true;
+
if (is_multiport_eligible(priv, out_dev))
return true;

@@ -4351,7 +4372,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
return &tc->ht;
}

-static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow, bool *is_sd)
{
struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
struct mlx5_flow_attr *attr = flow->attr;
@@ -4372,6 +4393,13 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
if (mlx5_lag_is_mpesw(esw_attr->in_mdev))
return true;

+ if (mlx5_lag_is_sd(esw_attr->in_mdev) &&
+ !mlx5_sd_is_primary(esw_attr->in_mdev)) {
+ if (!mlx5_lag_is_mpesw(mlx5_sd_get_primary(esw_attr->in_mdev)))
+ *is_sd = true;
+ return true;
+ }
+
return false;
}

@@ -4609,6 +4637,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
goto out;
}

+ peer_flow->peer_index = i;
list_add_tail(&peer_flow->peer_flows, &flow->peer_flows);
flow_flag_set(flow, DUP);
mutex_lock(&esw->offloads.peer_mutex);
@@ -4628,19 +4657,26 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow **__flow)
{
struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos;
+ struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *in_rep = rpriv->rep;
struct mlx5_core_dev *in_mdev = priv->mdev;
struct mlx5_eswitch *peer_esw;
struct mlx5e_tc_flow *flow;
+ bool is_sd = false;
int err;

+ if (mlx5_lag_is_sd(in_mdev) && !mlx5_lag_is_active(in_mdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "SD shared FDB not yet active");
+ return -EOPNOTSUPP;
+ }
+
flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
in_mdev);
if (IS_ERR(flow))
return PTR_ERR(flow);

- if (!is_peer_flow_needed(flow)) {
+ if (!is_peer_flow_needed(flow, &is_sd)) {
*__flow = flow;
return 0;
}
@@ -4651,6 +4687,15 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
}

mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
+ if (is_sd) {
+ /* SD shared FDB: only the matching SD primary. */
+ if (mlx5_sd_get_primary(in_mdev) !=
+ mlx5_sd_get_primary(peer_esw->dev))
+ continue;
+ } else {
+ if (!mlx5_sd_is_primary(peer_esw->dev))
+ continue;
+ }
err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw);
if (err)
goto peer_clean;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d65f30bb2f80..830fc910a080 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -4690,8 +4690,11 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps_nested);

void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
{
+ struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev);
struct mlx5_eswitch_rep *rep;

+ if (primary)
+ esw = primary->priv.eswitch;
rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
return rep->rep_data[rep_type].priv;
}
@@ -4713,6 +4716,11 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);

void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
{
+ struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev);
+
+ if (primary)
+ esw = primary->priv.eswitch;
+
return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
}
EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index a2c7e2927431..dd3f18f85466 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -2425,6 +2425,20 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_sriov);

+bool mlx5_lag_is_sd(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sd(ldev, dev);
+ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+}
+
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index cbe201529661..e412bb85027c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -202,6 +202,7 @@ static inline bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
}
#endif
bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
+bool mlx5_lag_is_sd(struct mlx5_core_dev *dev);
int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
struct mlx5_flow_table_attr *ft_attr);
void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index ec606851feb8..25286ecd724e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -49,13 +49,16 @@ static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
return sd->host_buses;
}

-static struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev)
+struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);

if (!sd)
return dev;

+ if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ return NULL;
+
return sd->primary ? dev : sd->primary_dev;
}

@@ -69,6 +72,16 @@ struct mlx5_devcom_comp_dev *mlx5_sd_get_devcom(struct mlx5_core_dev *dev)
return sd->devcom;
}

+bool mlx5_sd_is_primary(struct mlx5_core_dev *dev)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(dev);
+
+ if (!sd)
+ return true;
+
+ return sd->primary;
+}
+
struct mlx5_core_dev *
mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
index bf59903ab23f..011702ff6f02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -10,6 +10,8 @@

struct mlx5_sd;

+struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev);
+bool mlx5_sd_is_primary(struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx);
int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix);
int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix);
--
2.44.0