Re: [PATCH V4 1/2] virtio-net: fix the set affinity bug when CPUIDs are not consecutive

From: Jason Wang
Date: Fri Jan 11 2013 - 03:44:19 EST


On 01/11/2013 01:56 PM, Wanlong Gao wrote:
> As Michael mentioned, set affinity and select queue will not work very
> well when CPU IDs are not consecutive, this can happen with hot unplug.
> Fix this bug by traversal the online CPUs, and create a per cpu variable
> to find the mapping from CPU to the preferable virtual-queue.

Looks like ixgbe is trying to handling a similar issue recently by
trying to reuse XPS when the channels and cpus are not equal. I wonder
whether we can use the same method.
>
> Cc: Rusty Russell <rusty@xxxxxxxxxxxxxxx>
> Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxx>
> Cc: Jason Wang <jasowang@xxxxxxxxxx>
> Cc: Eric Dumazet <erdnetdev@xxxxxxxxx>
> Cc: virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
> Cc: netdev@xxxxxxxxxxxxxxx
> Signed-off-by: Wanlong Gao <gaowanlong@xxxxxxxxxxxxxx>
> ---
> V3->V4:
> move vq_index into virtnet_info (Jason)
> change the mapping value when not setting affinity (Jason)
> address the comments about select_queue (Rusty)
> Not Addressed yet:
> race between select_queue and set_channels need discuss more
> but not the same problem with this (Rusty)
>
> drivers/net/virtio_net.c | 53 ++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 42 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index a6fcf15..ca17a58 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -123,6 +123,9 @@ struct virtnet_info {
>
> /* Does the affinity hint is set for virtqueues? */
> bool affinity_hint_set;
> +
> + /* Per-cpu variable to show the mapping from CPU to virtqueue */
> + int __percpu *vq_index;
> };
>
> struct skb_vnet_hdr {
> @@ -1016,6 +1019,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
> static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
> {
> int i;
> + int cpu;
>
> /* In multiqueue mode, when the number of cpu is equal to the number of
> * queue pairs, we let the queue pairs to be private to one cpu by
> @@ -1029,16 +1033,29 @@ static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
> return;
> }
>
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - int cpu = set ? i : -1;
> - virtqueue_set_affinity(vi->rq[i].vq, cpu);
> - virtqueue_set_affinity(vi->sq[i].vq, cpu);
> - }
> + if (set) {
> + i = 0;
> + for_each_online_cpu(cpu) {
> + virtqueue_set_affinity(vi->rq[i].vq, cpu);
> + virtqueue_set_affinity(vi->sq[i].vq, cpu);
> + *per_cpu_ptr(vi->vq_index, cpu) = i;
> + i++;
> + }
>
> - if (set)
> vi->affinity_hint_set = true;
> - else
> + } else {
> + for(i = 0; i < vi->max_queue_pairs; i++) {
> + virtqueue_set_affinity(vi->rq[i].vq, -1);
> + virtqueue_set_affinity(vi->sq[i].vq, -1);
> + }
> +
> + i = 0;
> + for_each_online_cpu(cpu)
> + *per_cpu_ptr(vi->vq_index, cpu) =
> + ++i % vi->curr_queue_pairs;
> +
> vi->affinity_hint_set = false;
> + }
> }
>
> static void virtnet_get_ringparam(struct net_device *dev,
> @@ -1127,12 +1144,19 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
>
> /* To avoid contending a lock hold by a vcpu who would exit to host, select the
> * txq based on the processor id.
> - * TODO: handle cpu hotplug.
> */
> static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
> {
> - int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
> - smp_processor_id();
> + int txq;
> + struct virtnet_info *vi = netdev_priv(dev);
> +
> + if (skb_rx_queue_recorded(skb)) {
> + txq = skb_get_rx_queue(skb);
> + } else {
> + txq = *__this_cpu_ptr(vi->vq_index);
> + if (txq == -1)
> + txq = 0;
> + }
>
> while (unlikely(txq >= dev->real_num_tx_queues))
> txq -= dev->real_num_tx_queues;
> @@ -1453,6 +1477,10 @@ static int virtnet_probe(struct virtio_device *vdev)
> if (vi->stats == NULL)
> goto free;
>
> + vi->vq_index = alloc_percpu(int);
> + if (vi->vq_index == NULL)
> + goto free_stats;
> +
> mutex_init(&vi->config_lock);
> vi->config_enable = true;
> INIT_WORK(&vi->config_work, virtnet_config_changed_work);
> @@ -1476,7 +1504,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
> err = init_vqs(vi);
> if (err)
> - goto free_stats;
> + goto free_index;
>
> netif_set_real_num_tx_queues(dev, 1);
> netif_set_real_num_rx_queues(dev, 1);
> @@ -1520,6 +1548,8 @@ free_recv_bufs:
> free_vqs:
> cancel_delayed_work_sync(&vi->refill);
> virtnet_del_vqs(vi);
> +free_index:
> + free_percpu(vi->vq_index);
> free_stats:
> free_percpu(vi->stats);
> free:
> @@ -1554,6 +1584,7 @@ static void virtnet_remove(struct virtio_device *vdev)
>
> flush_work(&vi->config_work);
>
> + free_percpu(vi->vq_index);
> free_percpu(vi->stats);
> free_netdev(vi->dev);
> }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/