[PATCH 5.10 41/70] net/mlx5: Avoid double clear or set of sync reset requested

From: Greg Kroah-Hartman
Date: Tue May 10 2022 - 09:56:34 EST


From: Moshe Shemesh <moshe@xxxxxxxxxx>

commit fc3d3db07b35885f238e1fa06b9f04a8fa7a62d0 upstream.

Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.

Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event")
Signed-off-by: Moshe Shemesh <moshe@xxxxxxxxxx>
Reviewed-by: Maher Sanalla <msanalla@xxxxxxxxxx>
Reviewed-by: Shay Drory <shayd@xxxxxxxxxx>
Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 28 ++++++++++++++-------
1 file changed, 19 insertions(+), 9 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -134,14 +134,19 @@ static void mlx5_stop_sync_reset_poll(st
del_timer_sync(&fw_reset->timer);
}

-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;

+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
mlx5_stop_sync_reset_poll(dev);
- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
+ return 0;
}

#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
@@ -185,13 +190,17 @@ static int mlx5_fw_reset_set_reset_sync_
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}

-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;

+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
mlx5_stop_health_poll(dev, true);
- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
+ return 0;
}

static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -225,7 +234,9 @@ static void mlx5_sync_reset_request_even
err ? "Failed" : "Sent");
return;
}
- mlx5_sync_reset_set_reset_requested(dev);
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -325,7 +336,8 @@ static void mlx5_sync_reset_now_event(st
struct mlx5_core_dev *dev = fw_reset->dev;
int err;

- mlx5_sync_reset_clear_reset_requested(dev, false);
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;

mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");

@@ -354,10 +366,8 @@ static void mlx5_sync_reset_abort_event(
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;

- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
-
- mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}