[PATCH] net/ibmnvic: Fix deadlock problem in reset

From: Juliet Kim
Date: Thu Nov 15 2018 - 16:44:57 EST


Subject: [PATCH] net/ibmnvic: Fix deadlock problem in reset

From: Juliet Kim <julietk@xxxxxxxxxxxxxxxxxx>

This patch changes to use rtnl_lock only during a reset to avoid
deadlock that could occur when a thread operating close is holding
rtnl_lock and waiting for reset_lock acquired by another thread,
which is waiting for rtnl_lock in order to set the number of tx/rx
queues during a reset.

Also, we now setting the number of tx/rx queues during a soft reset
for failover or LPM events.

Signed-off-by: Juliet Kim <julietk@xxxxxxxxxxxxxxxxxx>
---
Âdrivers/net/ethernet/ibm/ibmvnic.c |ÂÂ 59 +++++++++++++-----------------------
Âdrivers/net/ethernet/ibm/ibmvnic.h |ÂÂÂ 2 +
Â2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7893bef..4a5de59 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1103,20 +1103,15 @@ static int ibmvnic_open(struct net_device *netdev)
ÂÂÂÂÂÂÂ return 0;
ÂÂÂ }

-ÂÂ mutex_lock(&adapter->reset_lock);
-
ÂÂÂ if (adapter->state != VNIC_CLOSED) {
ÂÂÂÂÂÂÂ rc = ibmvnic_login(netdev);
-ÂÂÂÂÂÂ if (rc) {
-ÂÂÂÂÂÂÂÂÂÂ mutex_unlock(&adapter->reset_lock);
+ÂÂÂÂÂÂ if (rc)
ÂÂÂÂÂÂÂÂÂÂÂ return rc;
-ÂÂÂÂÂÂ }

ÂÂÂÂÂÂÂ rc = init_resources(adapter);
ÂÂÂÂÂÂÂ if (rc) {
ÂÂÂÂÂÂÂÂÂÂÂ netdev_err(netdev, "failed to initialize resources\n");
ÂÂÂÂÂÂÂÂÂÂÂ release_resources(adapter);
-ÂÂÂÂÂÂÂÂÂÂ mutex_unlock(&adapter->reset_lock);
ÂÂÂÂÂÂÂÂÂÂÂ return rc;
ÂÂÂÂÂÂÂ }
ÂÂÂ }
@@ -1124,8 +1119,6 @@ static int ibmvnic_open(struct net_device *netdev)
ÂÂÂ rc = __ibmvnic_open(netdev);
ÂÂÂ netif_carrier_on(netdev);

-ÂÂ mutex_unlock(&adapter->reset_lock);
-
ÂÂÂ return rc;
Â}

@@ -1269,10 +1262,8 @@ static int ibmvnic_close(struct net_device *netdev)
ÂÂÂÂÂÂÂ return 0;
ÂÂÂ }

-ÂÂ mutex_lock(&adapter->reset_lock);
ÂÂÂ rc = __ibmvnic_close(netdev);
ÂÂÂ ibmvnic_cleanup(netdev);
-ÂÂ mutex_unlock(&adapter->reset_lock);

ÂÂÂ return rc;
Â}
@@ -1820,20 +1811,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return rc;
ÂÂÂÂÂÂÂ } else if (adapter->req_rx_queues != old_num_rx_queues ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ adapter->req_tx_queues != old_num_tx_queues) {
-ÂÂÂÂÂÂÂÂÂÂ adapter->map_id = 1;
ÂÂÂÂÂÂÂÂÂÂÂ release_rx_pools(adapter);
ÂÂÂÂÂÂÂÂÂÂÂ release_tx_pools(adapter);
-ÂÂÂÂÂÂÂÂÂÂ rc = init_rx_pools(netdev);
-ÂÂÂÂÂÂÂÂÂÂ if (rc)
-ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return rc;
-ÂÂÂÂÂÂÂÂÂÂ rc = init_tx_pools(netdev);
-ÂÂÂÂÂÂÂÂÂÂ if (rc)
-ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return rc;
-
ÂÂÂÂÂÂÂÂÂÂÂ release_napi(adapter);
-ÂÂÂÂÂÂÂÂÂÂ rc = init_napi(adapter);
+ÂÂÂÂÂÂÂÂÂÂ release_vpd_data(adapter);
+
+ÂÂÂÂÂÂÂÂÂÂ rc = init_resources(adapter);
ÂÂÂÂÂÂÂÂÂÂÂ if (rc)
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
ÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂ rc = reset_tx_pools(adapter);
ÂÂÂÂÂÂÂÂÂÂÂ if (rc)
@@ -1917,17 +1903,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
ÂÂÂÂÂÂÂ adapter->state = VNIC_PROBED;
ÂÂÂÂÂÂÂ return 0;
ÂÂÂ }
-ÂÂ /* netif_set_real_num_xx_queues needs to take rtnl lock here
-ÂÂÂ * unless wait_for_reset is set, in which case the rtnl lock
-ÂÂÂ * has already been taken before initializing the reset
-ÂÂÂ */
-ÂÂ if (!adapter->wait_for_reset) {
-ÂÂÂÂÂÂ rtnl_lock();
-ÂÂÂÂÂÂ rc = init_resources(adapter);
-ÂÂÂÂÂÂ rtnl_unlock();
-ÂÂ } else {
-ÂÂÂÂÂÂ rc = init_resources(adapter);
-ÂÂ }
+
+ÂÂ rc = init_resources(adapter);
ÂÂÂ if (rc)
ÂÂÂÂÂÂÂ return rc;

@@ -1986,13 +1963,21 @@ static void __ibmvnic_reset(struct work_struct *work)
ÂÂÂ struct ibmvnic_rwi *rwi;
ÂÂÂ struct ibmvnic_adapter *adapter;
ÂÂÂ struct net_device *netdev;
+ÂÂ bool we_lock_rtnl = false;
ÂÂÂ u32 reset_state;
ÂÂÂ int rc = 0;

ÂÂÂ adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
ÂÂÂ netdev = adapter->netdev;

-ÂÂ mutex_lock(&adapter->reset_lock);
+ÂÂ /* netif_set_real_num_xx_queues needs to take rtnl lock here
+ÂÂÂ * unless wait_for_reset is set, in which case the rtnl lock
+ÂÂÂ * has already been taken before initializing the reset
+ÂÂÂ */
+ÂÂ if (!adapter->wait_for_reset) {
+ÂÂÂÂÂÂ rtnl_lock();
+ÂÂÂÂÂÂ we_lock_rtnl = true;
+ÂÂ }

ÂÂÂ reset_state = adapter->state;


ÂÂÂ rwi = get_next_rwi(adapter);
@@ -2020,12 +2005,11 @@ static void __ibmvnic_reset(struct work_struct *work)
ÂÂÂ if (rc) {
ÂÂÂÂÂÂÂ netdev_dbg(adapter->netdev, "Reset failed\n");
ÂÂÂÂÂÂÂ free_all_rwi(adapter);
-ÂÂÂÂÂÂ mutex_unlock(&adapter->reset_lock);
-ÂÂÂÂÂÂ return;
ÂÂÂ }

ÂÂÂ adapter->resetting = false;
-ÂÂ mutex_unlock(&adapter->reset_lock);
+ÂÂ if (we_lock_rtnl)
+ÂÂÂÂÂÂ rtnl_unlock();
Â}

Âstatic int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -4768,7 +4752,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)

ÂÂÂ INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
ÂÂÂ INIT_LIST_HEAD(&adapter->rwi_list);
-ÂÂ mutex_init(&adapter->reset_lock);
ÂÂÂ mutex_init(&adapter->rwi_lock);
ÂÂÂ adapter->resetting = false;

@@ -4840,8 +4823,8 @@ static int ibmvnic_remove(struct vio_dev *dev)
ÂÂÂ struct ibmvnic_adapter *adapter = netdev_priv(netdev);

ÂÂÂ adapter->state = VNIC_REMOVING;
-ÂÂ unregister_netdev(netdev);
-ÂÂ mutex_lock(&adapter->reset_lock);
+ÂÂ rtnl_lock();
+ÂÂ unregister_netdevice(netdev);

ÂÂÂ release_resources(adapter);
ÂÂÂ release_sub_crqs(adapter, 1);
@@ -4852,7 +4835,7 @@ static int ibmvnic_remove(struct vio_dev *dev)

ÂÂÂ adapter->state = VNIC_REMOVED;

-ÂÂ mutex_unlock(&adapter->reset_lock);
+ÂÂ rtnl_unlock();
ÂÂÂ device_remove_file(&dev->dev, &dev_attr_failover);
ÂÂÂ free_netdev(netdev);
ÂÂÂ dev_set_drvdata(&dev->dev, NULL);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 18103b8..99c4f8d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1075,7 +1075,7 @@ struct ibmvnic_adapter {
ÂÂÂ struct tasklet_struct tasklet;
ÂÂÂ enum vnic_state state;
ÂÂÂ enum ibmvnic_reset_reason reset_reason;
-ÂÂ struct mutex reset_lock, rwi_lock;
+ÂÂ struct mutex rwi_lock;
ÂÂÂ struct list_head rwi_list;
ÂÂÂ struct work_struct ibmvnic_reset;
ÂÂÂ bool resetting;