[PATCH net 2/4] net/mlx5e: Fix HV VHCA stats agent registration race
From: Tariq Toukan
Date: Thu Jun 04 2026 - 09:57:58 EST
From: Feng Liu <feliu@xxxxxxxxxx>
mlx5e_hv_vhca_stats_create() registers the stats agent through
mlx5_hv_vhca_agent_create(). The helper publishes the agent in
hv_vhca->agents[type] under agents_lock and immediately schedules an
asynchronous control invalidation on the HV VHCA workqueue before
returning to mlx5e.
The asynchronous invalidation invokes the control agent's invalidate
callback, which reads the hypervisor control block and forwards the
command to mlx5e_hv_vhca_stats_control(). That callback may either:
- call cancel_delayed_work_sync(&priv->stats_agent.work), or
- call queue_delayed_work(priv->wq, &sagent->work, sagent->delay).
However, the delayed_work and priv->stats_agent.agent are only
initialized after mlx5_hv_vhca_agent_create() returns to mlx5e:
agent = mlx5_hv_vhca_agent_create(...); /* publish + invalidate */
...
priv->stats_agent.agent = agent; /* too late */
INIT_DELAYED_WORK(&priv->stats_agent.work, ...); /* too late */
If the asynchronous control path runs before the two assignments
above, it can:
- Operate on an uninitialized delayed_work whose timer.function is
NULL. queue_delayed_work() calls add_timer() unconditionally, so
when the timer expires the timer softirq invokes a NULL function
pointer.
- Re-initialize the timer later through INIT_DELAYED_WORK() while
the timer is already enqueued in the timer wheel, corrupting the
hlist (entry.pprev cleared while the previous bucket node still
points at this entry).
- When the worker eventually runs, mlx5e_hv_vhca_stats_work() reads
sagent->agent (NULL) and dereferences it inside
mlx5_hv_vhca_agent_write().
Fix this by:
- Initializing priv->stats_agent.work before invoking
mlx5_hv_vhca_agent_create(), so the work is always in a valid
state when the control callback observes it.
- Adding a struct mlx5_hv_vhca_agent **ctx_update out-parameter
to mlx5_hv_vhca_agent_create(). The helper writes the agent
pointer to *ctx_update before publishing into hv_vhca->agents[]
and triggering the agents_update flow, so any callback
subsequently invoked from that flow already sees a valid
priv->stats_agent.agent. This avoids having the control
callback participate in agent initialization.
While at it, clear priv->stats_agent.{agent,buf} after teardown and
on the agent_create() failure path. Without this, an enable/disable
cycle hitting an early-return in create can lead to a UAF or
double-destroy of stale pointers from the previous cycle.
Fixes: cef35af34d6d ("net/mlx5e: Add mlx5e HV VHCA stats agent")
Signed-off-by: Feng Liu <feliu@xxxxxxxxxx>
Reviewed-by: Eran Ben Elisha <eranbe@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../mellanox/mlx5/core/en/hv_vhca_stats.c | 22 ++++++++++++-------
.../ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 8 +++++--
.../ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 6 +++--
3 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index 06cbd49d4e98..2e495442a547 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -73,7 +73,7 @@ static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
priv = container_of(sagent, struct mlx5e_priv, stats_agent);
buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
- agent = sagent->agent;
+ agent = READ_ONCE(sagent->agent);
buf = sagent->buf;
memset(buf, 0, buf_len);
@@ -135,11 +135,14 @@ void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
if (!priv->stats_agent.buf)
return;
+ INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
mlx5e_hv_vhca_stats_control, NULL,
mlx5e_hv_vhca_stats_cleanup,
- priv);
+ priv,
+ &priv->stats_agent.agent);
if (IS_ERR_OR_NULL(agent)) {
if (IS_ERR(agent))
@@ -148,18 +151,21 @@ void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
agent);
kvfree(priv->stats_agent.buf);
- return;
+ priv->stats_agent.buf = NULL;
}
-
- priv->stats_agent.agent = agent;
- INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
{
- if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+ struct mlx5_hv_vhca_agent *agent;
+
+ agent = READ_ONCE(priv->stats_agent.agent);
+ if (IS_ERR_OR_NULL(agent))
return;
- mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+ mlx5_hv_vhca_agent_destroy(agent);
kvfree(priv->stats_agent.buf);
+
+ WRITE_ONCE(priv->stats_agent.agent, NULL);
+ priv->stats_agent.buf = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
index d6dc7bce855e..305752dab7bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -190,7 +190,7 @@ mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
NULL,
mlx5_hv_vhca_control_agent_invalidate,
- NULL, NULL);
+ NULL, NULL, NULL);
}
static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
@@ -256,7 +256,8 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
void (*invalidate)(struct mlx5_hv_vhca_agent*,
u64 block_mask),
void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
- void *priv)
+ void *priv,
+ struct mlx5_hv_vhca_agent **ctx_update)
{
struct mlx5_hv_vhca_agent *agent;
@@ -284,6 +285,9 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
agent->invalidate = invalidate;
agent->cleanup = cleaup;
+ if (ctx_update)
+ WRITE_ONCE(*ctx_update, agent);
+
mutex_lock(&hv_vhca->agents_lock);
hv_vhca->agents[type] = agent;
mutex_unlock(&hv_vhca->agents_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index f240ffe5116c..8b3974cf0ee4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -43,7 +43,8 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
void (*invalidate)(struct mlx5_hv_vhca_agent*,
u64 block_mask),
void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
- void *context);
+ void *context,
+ struct mlx5_hv_vhca_agent **ctx_update);
void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
@@ -84,7 +85,8 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
void (*invalidate)(struct mlx5_hv_vhca_agent*,
u64 block_mask),
void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
- void *context)
+ void *context,
+ struct mlx5_hv_vhca_agent **ctx_update)
{
return NULL;
}
--
2.44.0