[PATCH 4.14 052/138] net/mlx5: Fix race for multiple RoCE enable

From: Greg Kroah-Hartman
Date: Tue Apr 10 2018 - 18:56:44 EST


4.14-stable review patch. If anyone has any objections, please let me know.

------------------

From: Daniel Jurgens <danielj@xxxxxxxxxxxx>


[ Upstream commit 734dc065fc41f6143ff88225aa5d335cb1e0f6aa ]

There are two potential problems with the existing implementation.

1. Enable and disable can race after the atomic operations.
2. If a command fails the refcount is left in an inconsistent state.

Introduce a lock and perform error checking.

Fixes: a6f7d2aff623 ("net/mlx5: Add support for multiple RoCE enable")
Signed-off-by: Daniel Jurgens <danielj@xxxxxxxxxxxx>
Reviewed-by: Parav Pandit <parav@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx>
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/net/ethernet/mellanox/mlx5/core/vport.c | 33 +++++++++++++++++++-----
include/linux/mlx5/driver.h | 2 -
2 files changed, 28 insertions(+), 7 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,6 +36,9 @@
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"

+/* Mutex to hold while enabling or disabling RoCE */
+static DEFINE_MUTEX(mlx5_roce_en_lock);
+
static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u32 *out, int outlen)
{
@@ -998,17 +1001,35 @@ static int mlx5_nic_vport_update_roce_st

int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
{
- if (atomic_inc_return(&mdev->roce.roce_en) != 1)
- return 0;
- return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (!mdev->roce.roce_en)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+
+ if (!err)
+ mdev->roce.roce_en++;
+ mutex_unlock(&mlx5_roce_en_lock);
+
+ return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);

int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
{
- if (atomic_dec_return(&mdev->roce.roce_en) != 0)
- return 0;
- return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (mdev->roce.roce_en) {
+ mdev->roce.roce_en--;
+ if (mdev->roce.roce_en == 0)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+
+ if (err)
+ mdev->roce.roce_en++;
+ }
+ mutex_unlock(&mlx5_roce_en_lock);
+ return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);

--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -794,7 +794,7 @@ struct mlx5_core_dev {
struct mlx5e_resources mlx5e_res;
struct {
struct mlx5_rsvd_gids reserved_gids;
- atomic_t roce_en;
+ u32 roce_en;
} roce;
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;