Re: [PATCH net] virtio_net: fix xdp_rxq_info bug after suspend/resume

From: Jason Wang
Date: Wed Jun 22 2022 - 04:06:38 EST


On Tue, Jun 21, 2022 at 7:50 PM Stephan Gerhold
<stephan.gerhold@xxxxxxxxxxxxxxx> wrote:
>
> The following sequence currently causes a driver bug warning
> when using virtio_net:
>
> # ip link set eth0 up
> # echo mem > /sys/power/state (or e.g. # rtcwake -s 10 -m mem)
> <resume>
> # ip link set eth0 down
>
> Missing register, driver bug
> WARNING: CPU: 0 PID: 375 at net/core/xdp.c:138 xdp_rxq_info_unreg+0x58/0x60
> Call trace:
> xdp_rxq_info_unreg+0x58/0x60
> virtnet_close+0x58/0xac
> __dev_close_many+0xac/0x140
> __dev_change_flags+0xd8/0x210
> dev_change_flags+0x24/0x64
> do_setlink+0x230/0xdd0
> ...
>
> This happens because virtnet_freeze() frees the receive_queue
> completely (including struct xdp_rxq_info) but does not call
> xdp_rxq_info_unreg(). Similarly, virtnet_restore() sets up the
> receive_queue again but does not call xdp_rxq_info_reg().
>
> Actually, parts of virtnet_freeze_down() and virtnet_restore_up()
> are almost identical to virtnet_close() and virtnet_open(): only
> the calls to xdp_rxq_info_(un)reg() are missing. This means that
> we can fix this easily and avoid such problems in the future by
> just calling virtnet_close()/open() from the freeze/restore handlers.
>
> Aside from adding the missing xdp_rxq_info calls the only difference
> is that the refill work is only cancelled if netif_running(). However,
> this should not make any functional difference since the refill work
> should only be active if the network interface is actually up.
>
> Fixes: 754b8a21a96d ("virtio_net: setup xdp_rxq_info")
> Signed-off-by: Stephan Gerhold <stephan.gerhold@xxxxxxxxxxxxxxx>

Acked-by: Jason Wang <jasowang@xxxxxxxxxx>

> ---
> drivers/net/virtio_net.c | 25 ++++++-------------------
> 1 file changed, 6 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index db05b5e930be..969a67970e71 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2768,7 +2768,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
> static void virtnet_freeze_down(struct virtio_device *vdev)
> {
> struct virtnet_info *vi = vdev->priv;
> - int i;
>
> /* Make sure no work handler is accessing the device */
> flush_work(&vi->config_work);
> @@ -2776,14 +2775,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
> netif_tx_lock_bh(vi->dev);
> netif_device_detach(vi->dev);
> netif_tx_unlock_bh(vi->dev);
> - cancel_delayed_work_sync(&vi->refill);
> -
> - if (netif_running(vi->dev)) {
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - napi_disable(&vi->rq[i].napi);
> - virtnet_napi_tx_disable(&vi->sq[i].napi);
> - }
> - }
> + if (netif_running(vi->dev))
> + virtnet_close(vi->dev);
> }
>
> static int init_vqs(struct virtnet_info *vi);
> @@ -2791,7 +2784,7 @@ static int init_vqs(struct virtnet_info *vi);
> static int virtnet_restore_up(struct virtio_device *vdev)
> {
> struct virtnet_info *vi = vdev->priv;
> - int err, i;
> + int err;
>
> err = init_vqs(vi);
> if (err)
> @@ -2800,15 +2793,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
> virtio_device_ready(vdev);
>
> if (netif_running(vi->dev)) {
> - for (i = 0; i < vi->curr_queue_pairs; i++)
> - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> - schedule_delayed_work(&vi->refill, 0);
> -
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
> - virtnet_napi_tx_enable(vi, vi->sq[i].vq,
> - &vi->sq[i].napi);
> - }
> + err = virtnet_open(vi->dev);
> + if (err)
> + return err;
> }
>
> netif_tx_lock_bh(vi->dev);
> --
> 2.30.2
>