Re: [PATCH net v2] lan743x: fix rx_napi_poll/interrupt ping-pong
From: Heiner Kallweit
Date: Thu Dec 10 2020 - 02:33:22 EST
Am 10.12.2020 um 04:55 schrieb Sven Van Asbroeck:
> From: Sven Van Asbroeck <thesven73@xxxxxxxxx>
>
> Even if there is more rx data waiting on the chip, the rx napi poll fn
> will never run more than once - it will always read a few buffers, then
> bail out and re-arm interrupts. Which results in ping-pong between napi
> and interrupt.
>
> This defeats the purpose of napi, and is bad for performance.
>
> Fix by making the rx napi poll behave identically to other ethernet
> drivers:
> 1. initialize rx napi polling with an arbitrary budget (64).
> 2. in the polling fn, return full weight if rx queue is not depleted,
> this tells the napi core to "keep polling".
> 3. update the rx tail ("ring the doorbell") once for every 8 processed
> rx ring buffers.
>
> Thanks to Jakub Kicinski, Eric Dumazet and Andrew Lunn for their expert
> opinions and suggestions.
>
> Tested with 20 seconds of full bandwidth receive (iperf3):
> rx irqs softirqs(NET_RX)
> -----------------------------
> before 23827 33620
> after 129 4081
>
In addition you could play with sysfs attributes
/sys/class/net/<if>/gro_flush_timeout
/sys/class/net/<if>/napi_defer_hard_irqs
> Tested-by: Sven Van Asbroeck <thesven73@xxxxxxxxx> # lan7430
> Fixes: 23f0703c125be ("lan743x: Add main source files for new lan743x driver")
> Signed-off-by: Sven Van Asbroeck <thesven73@xxxxxxxxx>
> ---
>
> Tree: git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git # b7e4ba9a91df
>
> To: Bryan Whitehead <bryan.whitehead@xxxxxxxxxxxxx>
> To: Microchip Linux Driver Support <UNGLinuxDriver@xxxxxxxxxxxxx>
> To: "David S. Miller" <davem@xxxxxxxxxxxxx>
> To: Jakub Kicinski <kuba@xxxxxxxxxx>
> Cc: Andrew Lunn <andrew@xxxxxxx>
> Cc: netdev@xxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
>
> drivers/net/ethernet/microchip/lan743x_main.c | 44 ++++++++++---------
> 1 file changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
> index 87b6c59a1e03..30ec308b9a4c 100644
> --- a/drivers/net/ethernet/microchip/lan743x_main.c
> +++ b/drivers/net/ethernet/microchip/lan743x_main.c
> @@ -1964,6 +1964,14 @@ static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx)
> length, GFP_ATOMIC | GFP_DMA);
> }
>
> +static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index)
> +{
> + /* update the tail once per 8 descriptors */
> + if ((index & 7) == 7)
> + lan743x_csr_write(rx->adapter, RX_TAIL(rx->channel_number),
> + index);
> +}
> +
> static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index,
> struct sk_buff *skb)
> {
> @@ -1994,6 +2002,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index,
> descriptor->data0 = (RX_DESC_DATA0_OWN_ |
> (length & RX_DESC_DATA0_BUF_LENGTH_MASK_));
> skb_reserve(buffer_info->skb, RX_HEAD_PADDING);
> + lan743x_rx_update_tail(rx, index);
>
> return 0;
> }
> @@ -2012,6 +2021,7 @@ static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index)
> descriptor->data0 = (RX_DESC_DATA0_OWN_ |
> ((buffer_info->buffer_length) &
> RX_DESC_DATA0_BUF_LENGTH_MASK_));
> + lan743x_rx_update_tail(rx, index);
> }
>
> static void lan743x_rx_release_ring_element(struct lan743x_rx *rx, int index)
> @@ -2223,34 +2233,26 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
> struct lan743x_rx *rx = container_of(napi, struct lan743x_rx, napi);
> struct lan743x_adapter *adapter = rx->adapter;
> u32 rx_tail_flags = 0;
> - int count;
> + int count, result;
>
> if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C) {
> /* clear int status bit before reading packet */
> lan743x_csr_write(adapter, DMAC_INT_STS,
> DMAC_INT_BIT_RXFRM_(rx->channel_number));
> }
> - count = 0;
> - while (count < weight) {
> - int rx_process_result = lan743x_rx_process_packet(rx);
> -
> - if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
> - count++;
> - } else if (rx_process_result ==
> - RX_PROCESS_RESULT_NOTHING_TO_DO) {
> + for (count = 0; count < weight; count++) {
> + result = lan743x_rx_process_packet(rx);
> + if (result == RX_PROCESS_RESULT_NOTHING_TO_DO)
> break;
> - } else if (rx_process_result ==
> - RX_PROCESS_RESULT_PACKET_DROPPED) {
> - continue;
> - }
> }
> rx->frame_count += count;
> - if (count == weight)
> - goto done;
> + if (count == weight || result == RX_PROCESS_RESULT_PACKET_RECEIVED)
> + return weight;
>
> if (!napi_complete_done(napi, count))
> - goto done;
> + return count;
>
> + /* re-arm interrupts, must write to rx tail on some chip variants */
> if (rx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
> rx_tail_flags |= RX_TAIL_SET_TOP_INT_VEC_EN_;
> if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET) {
> @@ -2260,10 +2262,10 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
> INT_BIT_DMA_RX_(rx->channel_number));
> }
>
> - /* update RX_TAIL */
> - lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
> - rx_tail_flags | rx->last_tail);
> -done:
> + if (rx_tail_flags)
> + lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
> + rx_tail_flags | rx->last_tail);
> +
> return count;
> }
>
> @@ -2407,7 +2409,7 @@ static int lan743x_rx_open(struct lan743x_rx *rx)
>
> netif_napi_add(adapter->netdev,
> &rx->napi, lan743x_rx_napi_poll,
> - rx->ring_size - 1);
> + 64);
This value isn't completely arbitrary.
Better use constant NAPI_POLL_WEIGHT.
>
> lan743x_csr_write(adapter, DMAC_CMD,
> DMAC_CMD_RX_SWR_(rx->channel_number));
>