[PATCH V2 net 1/3] net: hns3: fix for TX queue not restarted problem

From: Huazhong Tan
Date: Wed Dec 04 2019 - 21:12:32 EST


From: Yunsheng Lin <linyunsheng@xxxxxxxxxx>

There is timing window between ring_space checking and
netif_stop_subqueue when transmiting a SKB, and the TX BD
cleaning may be executed during the time window, which may
caused TX queue not restarted problem.

This patch fixes it by rechecking the ring_space after
netif_stop_subqueue to make sure TX queue is restarted.

Also, the ring->next_to_clean is updated even when pkts is
zero, because all the TX BD cleaned may be non-SKB, so it
needs to check if TX queue need to be restarted.

Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
Signed-off-by: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
---
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 33 ++++++++++++++++---------
1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index ba05368..e273031 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1287,8 +1287,10 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
}

static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
+ struct net_device *netdev,
struct sk_buff **out_skb)
{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
struct sk_buff *skb = *out_skb;
unsigned int bd_num;
@@ -1320,10 +1322,23 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
}

out:
- if (unlikely(ring_space(ring) < bd_num))
- return -EBUSY;
+ if (likely(ring_space(ring) >= bd_num))
+ return bd_num;

- return bd_num;
+ netif_stop_subqueue(netdev, ring->queue_index);
+ smp_mb(); /* Memory barrier before checking ring_space */
+
+ /* Start queue in case hns3_clean_tx_ring has just made room
+ * available and has not seen the queue stopped state performed
+ * by netif_stop_subqueue above.
+ */
+ if (ring_space(ring) >= bd_num && netif_carrier_ok(netdev) &&
+ !test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) {
+ netif_start_subqueue(netdev, ring->queue_index);
+ return bd_num;
+ }
+
+ return -EBUSY;
}

static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
@@ -1400,13 +1415,13 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
/* Prefetch the data used later */
prefetch(skb->data);

- ret = hns3_nic_maybe_stop_tx(ring, &skb);
+ ret = hns3_nic_maybe_stop_tx(ring, netdev, &skb);
if (unlikely(ret <= 0)) {
if (ret == -EBUSY) {
u64_stats_update_begin(&ring->syncp);
ring->stats.tx_busy++;
u64_stats_update_end(&ring->syncp);
- goto out_net_tx_busy;
+ return NETDEV_TX_BUSY;
} else if (ret == -ENOMEM) {
u64_stats_update_begin(&ring->syncp);
ring->stats.sw_err_cnt++;
@@ -1457,12 +1472,6 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
out_err_tx_ok:
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
-
-out_net_tx_busy:
- netif_stop_subqueue(netdev, ring->queue_index);
- smp_mb(); /* Commit all data before submit */
-
- return NETDEV_TX_BUSY;
}

static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
@@ -2519,7 +2528,7 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
dev_queue = netdev_get_tx_queue(netdev, ring->tqp->tqp_index);
netdev_tx_completed_queue(dev_queue, pkts, bytes);

- if (unlikely(pkts && netif_carrier_ok(netdev) &&
+ if (unlikely(netif_carrier_ok(netdev) &&
ring_space(ring) > HNS3_MAX_TSO_BD_NUM)) {
/* Make sure that anybody stopping the queue after this
* sees the new next_to_clean.
--
2.7.4