Re: [PATCH] virtio-net: hold netdev_lock when pausing rx
From: Michael S. Tsirkin
Date: Thu Apr 10 2025 - 04:00:02 EST
On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> napi_disable() on the receive queue's napi. In delayed refill_work, it
> also calls napi_disable() on the receive queue's napi. When
> napi_disable() is called on an already disabled napi, it will sleep in
> napi_disable_locked while still holding the netdev_lock. As a result,
> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> This leads to refill_work and the pause-then-resume tx are stuck
> altogether.
>
> This scenario can be reproducible by binding a XDP socket to virtio-net
> interface without setting up the fill ring. As a result, try_fill_recv
> will fail until the fill ring is set up and refill_work is scheduled.
>
> This commit makes the pausing rx path hold the netdev_lock until
> resuming, prevent any napi_disable() to be called on a temporarily
> disabled napi.
>
> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
> Signed-off-by: Bui Quang Minh <minhquangbui99@xxxxxxxxx>
> ---
> drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
> 1 file changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7e4617216a4b..74bd1065c586 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
> }
>
> static void virtnet_napi_do_enable(struct virtqueue *vq,
> - struct napi_struct *napi)
> + struct napi_struct *napi,
> + bool netdev_locked)
> {
> - napi_enable(napi);
> + if (netdev_locked)
> + napi_enable_locked(napi);
> + else
> + napi_enable(napi);
>
> /* If all buffers were filled by other side before we napi_enabled, we
> * won't get another interrupt, so process any outstanding packets now.
> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
> *vq,
Your patch is line-wrapped, unfortunately. Here and elsewhere.
> local_bh_enable();
> }
>
> -static void virtnet_napi_enable(struct receive_queue *rq)
> +static void virtnet_napi_enable(struct receive_queue *rq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> int qidx = vq2rxq(rq->vq);
>
> - virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
> }
>
> -static void virtnet_napi_tx_enable(struct send_queue *sq)
> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
> *sq)
> return;
> }
>
> - virtnet_napi_do_enable(sq->vq, napi);
> + virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
> }
>
> -static void virtnet_napi_tx_disable(struct send_queue *sq)
> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
> send_queue *sq)
>
> if (napi->weight) {
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
> - napi_disable(napi);
> + if (netdev_locked)
> + napi_disable_locked(napi);
> + else
> + napi_disable(napi);
> }
> }
>
> -static void virtnet_napi_disable(struct receive_queue *rq)
> +static void virtnet_napi_disable(struct receive_queue *rq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> struct napi_struct *napi = &rq->napi;
> int qidx = vq2rxq(rq->vq);
>
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
> - napi_disable(napi);
> + if (netdev_locked)
> + napi_disable_locked(napi);
> + else
> + napi_disable(napi);
> }
>
> static void refill_work(struct work_struct *work)
> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
> * instance lock)
> * - check netif_running() and return early to avoid a race
> */
> - napi_disable(&rq->napi);
> + netdev_lock(vi->dev);
> + napi_disable_locked(&rq->napi);
> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
This does mean netdev_lock is held potentially for a long while,
while try_fill_recv and processing inside virtnet_napi_do_enable
finish. Better ideas?
> - virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_napi_do_enable(rq->vq, &rq->napi, true);
> + netdev_unlock(vi->dev);
>
> /* In theory, this can happen: if we don't get any buffers in
> * we will *never* try to fill again.
> @@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi, int
> budget)
>
> static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
> qp_index)
> {
> - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> - virtnet_napi_disable(&vi->rq[qp_index]);
> + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> + virtnet_napi_disable(&vi->rq[qp_index], false);
> xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> }
>
> @@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
> virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
> - virtnet_napi_enable(&vi->rq[qp_index]);
> - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> + virtnet_napi_enable(&vi->rq[qp_index], false);
> + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
>
> return 0;
>
> @@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi,
> struct receive_queue *rq)
> bool running = netif_running(vi->dev);
>
> if (running) {
> - virtnet_napi_disable(rq);
> + netdev_lock(vi->dev);
> + virtnet_napi_disable(rq, true);
> virtnet_cancel_dim(vi, &rq->dim);
> }
> }
> @@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
> *vi, struct receive_queue *rq)
> if (!try_fill_recv(vi, rq, GFP_KERNEL))
> schedule_delayed_work(&vi->refill, 0);
>
> - if (running)
> - virtnet_napi_enable(rq);
> + if (running) {
> + virtnet_napi_enable(rq, true);
> + netdev_unlock(vi->dev);
> + }
> }
>
> static int virtnet_rx_resize(struct virtnet_info *vi,
> @@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi,
> struct send_queue *sq)
> qindex = sq - vi->sq;
>
> if (running)
> - virtnet_napi_tx_disable(sq);
> + virtnet_napi_tx_disable(sq, false);
>
> txq = netdev_get_tx_queue(vi->dev, qindex);
>
> @@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi,
> struct send_queue *sq)
> __netif_tx_unlock_bh(txq);
>
> if (running)
> - virtnet_napi_tx_enable(sq);
> + virtnet_napi_tx_enable(sq, false);
> }
>
> static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue
> *sq,
> @@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>
> /* Make sure NAPI is not using any XDP TX queues for RX. */
> if (netif_running(dev)) {
> + netdev_lock(dev);
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_disable(&vi->rq[i]);
> - virtnet_napi_tx_disable(&vi->sq[i]);
> + virtnet_napi_disable(&vi->rq[i], true);
> + virtnet_napi_tx_disable(&vi->sq[i], true);
> }
> }
>
> @@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
> if (old_prog)
> bpf_prog_put(old_prog);
> if (netif_running(dev)) {
> - virtnet_napi_enable(&vi->rq[i]);
> - virtnet_napi_tx_enable(&vi->sq[i]);
> + virtnet_napi_enable(&vi->rq[i], true);
> + virtnet_napi_tx_enable(&vi->sq[i], true);
> }
> }
>
> + if (netif_running(dev))
> + netdev_unlock(dev);
> +
> return 0;
>
> err:
> @@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>
> if (netif_running(dev)) {
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_enable(&vi->rq[i]);
> - virtnet_napi_tx_enable(&vi->sq[i]);
> + virtnet_napi_enable(&vi->rq[i], true);
> + virtnet_napi_tx_enable(&vi->sq[i], true);
> }
> + netdev_unlock(dev);
> }
> if (prog)
> bpf_prog_sub(prog, vi->max_queue_pairs - 1);
> --
> 2.43.0
>