[PATCH 4.4 083/312] net/mlx5: Fix potential deadlock in command mode change

From: Greg Kroah-Hartman
Date: Fri May 08 2020 - 08:39:44 EST


From: Mohamad Haj Yahia <mohamad@xxxxxxxxxxxx>

commit 9cba4ebcf374c3772f6eb61f2d065294b2451b49 upstream.

Call command completion handler in case of timeout when working in
interrupts mode.
Avoid flushing the commands workqueue after acquiring the semaphores to
prevent a potential deadlock.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Mohamad Haj Yahia <mohamad@xxxxxxxxxxxx>
Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 79 ++++++++++----------------
1 file changed, 33 insertions(+), 46 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -750,13 +750,13 @@ static int wait_func(struct mlx5_core_de

if (cmd->mode == CMD_MODE_POLLING) {
wait_for_completion(&ent->done);
- err = ent->ret;
- } else {
- if (!wait_for_completion_timeout(&ent->done, timeout))
- err = -ETIMEDOUT;
- else
- err = 0;
+ } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
}
+
+ err = ent->ret;
+
if (err == -ETIMEDOUT) {
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
@@ -817,28 +817,26 @@ static int mlx5_cmd_invoke(struct mlx5_c
goto out_free;
}

- if (!callback) {
- err = wait_func(dev, ent);
- if (err == -ETIMEDOUT)
- goto out;
-
- ds = ent->ts2 - ent->ts1;
- op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
- if (op < ARRAY_SIZE(cmd->stats)) {
- stats = &cmd->stats[op];
- spin_lock_irq(&stats->lock);
- stats->sum += ds;
- ++stats->n;
- spin_unlock_irq(&stats->lock);
- }
- mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
- "fw exec time for %s is %lld nsec\n",
- mlx5_command_str(op), ds);
- *status = ent->status;
- free_cmd(ent);
- }
+ if (callback)
+ goto out;

- return err;
+ err = wait_func(dev, ent);
+ if (err == -ETIMEDOUT)
+ goto out_free;
+
+ ds = ent->ts2 - ent->ts1;
+ op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+ if (op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[op];
+ spin_lock_irq(&stats->lock);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irq(&stats->lock);
+ }
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+ "fw exec time for %s is %lld nsec\n",
+ mlx5_command_str(op), ds);
+ *status = ent->status;

out_free:
free_cmd(ent);
@@ -1230,41 +1228,30 @@ err_dbg:
return err;
}

-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
{
struct mlx5_cmd *cmd = &dev->cmd;
int i;

for (i = 0; i < cmd->max_reg_cmds; i++)
down(&cmd->sem);
-
down(&cmd->pages_sem);

- flush_workqueue(cmd->wq);
-
- cmd->mode = CMD_MODE_EVENTS;
+ cmd->mode = mode;

up(&cmd->pages_sem);
for (i = 0; i < cmd->max_reg_cmds; i++)
up(&cmd->sem);
}

-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
- struct mlx5_cmd *cmd = &dev->cmd;
- int i;
-
- for (i = 0; i < cmd->max_reg_cmds; i++)
- down(&cmd->sem);
-
- down(&cmd->pages_sem);
-
- flush_workqueue(cmd->wq);
- cmd->mode = CMD_MODE_POLLING;
+ mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}

- up(&cmd->pages_sem);
- for (i = 0; i < cmd->max_reg_cmds; i++)
- up(&cmd->sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
}

static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)