Re: [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
From: Tariq Toukan
Date: Mon Feb 23 2026 - 08:50:39 EST
On 23/02/2026 2:05, Finn Dayton wrote:
mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
(CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
= priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and theredirect fails.
Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
CPU IDs into the channel range, but modulo/division is too costly in
the hot path.
That discussion reached to an agreement of using a while loop with subtraction. It's expected to be fast, and optimizes the common case.
Instead, this solution precomputes per-cpu priv->xdpsq assignments when
channels are (re)configured and does a single lookup in mlx5e_xdp_xmit().
Because multiple CPUs map to the same xdpsq when CPU count exceeds
channel count, serialize xdp_xmit on the ring with xdp_tx_lock.
What's the advantage of this solution over the one we already agreed to?
Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
Link: https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@xxxxxxxxxxxxx/
Link: https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@xxxxxxxxx
Cc: stable@xxxxxxxxxxxxxxx # 6.12+
Signed-off-by: Finn Dayton <finnius.dayton@xxxxxxxxxx>
---
Testing:
- XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
CPU ids > than number of send queues.
- No -ENXIO observed, successful forwarding.
drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++
.../net/ethernet/mellanox/mlx5/core/en/xdp.c | 16 +++++++----
.../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
3 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ea2cd1f5d1d0..387954201640 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
+ /* serialize writes by multiple CPUs to this send queue */
+ spinlock_t xdp_tx_lock;
} ____cacheline_aligned_in_smp;
struct mlx5e_xdp_buff {
@@ -909,6 +911,8 @@ struct mlx5e_priv {
struct mlx5e_rq drop_rq;
struct mlx5e_channels channels;
+ /* selects the xdpsq during mlx5e_xdp_xmit() */
+ int __percpu *send_queue_idx_ptr;
struct mlx5e_rx_res *rx_res;
u32 *tx_rates;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 80f9fc10877a..2dd44ad873a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_xdpsq *sq;
int nxmit = 0;
- int sq_num;
+ int send_queue_idx = 0;
int i;
/* this flag is sufficient, no need to test internal sq state */
@@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
- sq_num = smp_processor_id();
- if (unlikely(sq_num >= priv->channels.num))
+ if (unlikely(!priv->send_queue_idx_ptr))
return -ENXIO;
- sq = priv->channels.c[sq_num]->xdpsq;
+ send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
+ if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
+ return -ENXIO;
+ sq = priv->channels.c[send_queue_idx]->xdpsq;
+ /* The number of queues configured on a netdev may be smaller than the
+ * CPU pool, so two CPUs might map to this queue. We must serialize writes.
+ */
+ spin_lock(&sq->xdp_tx_lock);
for (i = 0; i < n; i++) {
struct mlx5e_xmit_data_frags xdptxdf = {};
struct xdp_frame *xdpf = frames[i];
@@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
if (flags & XDP_XMIT_FLUSH)
mlx5e_xmit_xdp_doorbell(sq);
-
+ spin_unlock(&sq->xdp_tx_lock);
return nxmit;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7eb691c2a1bd..adef35d06b89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->pdev = c->pdev;
sq->mkey_be = c->mkey_be;
sq->channel = c;
+ spin_lock_init(&sq->xdp_tx_lock);
sq->uar_map = c->bfreg->map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
@@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
smp_wmb();
}
+static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
+{
+ /*
+ * Build the mapping from CPU to XDP send queue index for priv.
+ * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
+ * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
+ * and the target priv (netdev).
+ */
+ int send_queue_idx, cpu;
+
+ if (unlikely(priv->channels.num == 0))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ send_queue_idx = cpu % priv->channels.num;
+ *per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
+ }
+}
+
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_build_txq_maps(priv);
mlx5e_activate_channels(priv, &priv->channels);
+ build_priv_to_xdpsq_associations(priv);
mlx5e_xdp_tx_enable(priv);
/* dev_watchdog() wants all TX queues to be started when the carrier is
@@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
if (!priv->fec_ranges)
goto err_free_channel_stats;
+ priv->send_queue_idx_ptr = alloc_percpu(int);
+ if (!priv->send_queue_idx_ptr)
+ goto err_free_fec_ranges;
+
return 0;
+err_free_fec_ranges:
+ kfree(priv->fec_ranges);
err_free_channel_stats:
kfree(priv->channel_stats);
err_free_tx_rates:
@@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
for (i = 0; i < priv->stats_nch; i++)
kvfree(priv->channel_stats[i]);
kfree(priv->channel_stats);
+ free_percpu(priv->send_queue_idx_ptr);
kfree(priv->tx_rates);
kfree(priv->txq2sq_stats);
kfree(priv->txq2sq);