Re: [PATCH 1/2] NET: Multiple queue network device support

From: Thomas Graf
Date: Fri Mar 09 2007 - 08:40:45 EST


* Kok, Auke <auke-jan.h.kok@xxxxxxxxx> 2007-02-08 16:09
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 455d589..42b635c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1477,6 +1477,49 @@ gso:
> skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
> #endif
> if (q->enqueue) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int queue_index;
> + /* If we're a multi-queue device, get a queue index to lock */
> + if (netif_is_multiqueue(dev))
> + {
> + /* Get the queue index and lock it. */
> + if (likely(q->ops->map_queue)) {
> + queue_index = q->ops->map_queue(skb, q);
> + spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> + rc = q->enqueue(skb, q);
> + /*
> + * Unlock because the underlying qdisc
> + * may queue and send a packet from a
> + * different queue.
> + */
> + spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> + qdisc_run(dev);

I really dislike this asymmetric locking logic, while enqueue() is called
with queue_lock held dequeue() is repsonsible to acquire the lock for
qdisc_restart().

> + rc = rc == NET_XMIT_BYPASS
> + ? NET_XMIT_SUCCESS : rc;
> + goto out;
> + } else {
> + printk(KERN_CRIT "Device %s is "
> + "multiqueue, but map_queue is "
> + "undefined in the qdisc!!\n",
> + dev->name);
> + kfree_skb(skb);

Move this check to tc_modify_qdisc(), it's useless to check this for
every packet being delivered.

> + }
> + } else {
> + /* We're not a multi-queue device. */
> + spin_lock(&dev->queue_lock);
> + q = dev->qdisc;
> + if (q->enqueue) {
> + rc = q->enqueue(skb, q);
> + qdisc_run(dev);
> + spin_unlock(&dev->queue_lock);
> + rc = rc == NET_XMIT_BYPASS
> + ? NET_XMIT_SUCCESS : rc;
> + goto out;
> + }
> + spin_unlock(&dev->queue_lock);

Please don't duplicate already existing code.

> @@ -130,6 +140,72 @@ static inline int qdisc_restart(struct net_device *dev)
> }
>
> {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(dev)) {
> + if (likely(q->ops->map_queue)) {
> + queue_index = q->ops->map_queue(skb, q);
> + } else {
> + printk(KERN_CRIT "Device %s is "
> + "multiqueue, but map_queue is "
> + "undefined in the qdisc!!\n",
> + dev->name);
> + goto requeue;
> + }

Yet another condition completely useless for every transmission.

> + spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> + /* Check top level device, and any sub-device */
> + if ((!netif_queue_stopped(dev)) &&
> + (!netif_subqueue_stopped(dev, queue_index))) {
> + int ret;
> + ret = dev->hard_start_subqueue_xmit(skb, dev, queue_index);
> + if (ret == NETDEV_TX_OK) {
> + if (!nolock) {
> + netif_tx_unlock(dev);
> + }
> + return -1;
> + }
> + if (ret == NETDEV_TX_LOCKED && nolock) {
> + spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> + goto collision;
> + }
> + }
> + /* NETDEV_TX_BUSY - we need to requeue */
> + /* Release the driver */
> + if (!nolock) {
> + netif_tx_unlock(dev);
> + }
> + spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> + q = dev->qdisc;

This is identical to the existing logic except for the different lock,
the additional to check subqueue state and a different hard_start_xmit
call. Share the logic.

> + }
> + else
> + {
> + /* We're a single-queue device */
> + /* And release queue */
> + spin_unlock(&dev->queue_lock);
> + if (!netif_queue_stopped(dev)) {
> + int ret;
> +
> + ret = dev->hard_start_xmit(skb, dev);
> + if (ret == NETDEV_TX_OK) {
> + if (!nolock) {
> + netif_tx_unlock(dev);
> + }
> + spin_lock(&dev->queue_lock);
> + return -1;
> + }
> + if (ret == NETDEV_TX_LOCKED && nolock) {
> + spin_lock(&dev->queue_lock);
> + goto collision;
> + }
> + }
> + /* NETDEV_TX_BUSY - we need to requeue */
> + /* Release the driver */
> + if (!nolock) {
> + netif_tx_unlock(dev);
> + }
> + spin_lock(&dev->queue_lock);
> + q = dev->qdisc;

Again, you just copied the existing code into a separate branch, fix the
branching logic!

> @@ -356,10 +454,18 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
> struct sk_buff_head *list = qdisc_priv(qdisc);
>
> for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(qdisc->dev))
> + spin_lock(&qdisc->dev->egress_subqueue[0].queue_lock);
> +#endif
> if (!skb_queue_empty(list + prio)) {
> qdisc->q.qlen--;
> return __qdisc_dequeue_head(qdisc, list + prio);
> }
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(qdisc->dev))
> + spin_unlock(&qdisc->dev->egress_subqueue[0].queue_lock);
> +#endif

This lock/unlock for every band definitely screws performance.

> }
>
> return NULL;
> @@ -141,18 +174,53 @@ prio_dequeue(struct Qdisc* sch)
> struct sk_buff *skb;
> struct prio_sched_data *q = qdisc_priv(sch);
> int prio;
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int queue;
> +#endif
> struct Qdisc *qdisc;
>
> + /*
> + * If we're multiqueue, the basic approach is try the lock on each
> + * queue. If it's locked, either we're already dequeuing, or enqueue
> + * is doing something. Go to the next band if we're locked. Once we
> + * have a packet, unlock the queue. NOTE: the underlying qdisc CANNOT
> + * be a PRIO qdisc, otherwise we will deadlock. FIXME
> + */
> for (prio = 0; prio < q->bands; prio++) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(sch->dev)) {
> + queue = q->band2queue[prio];
> + if (spin_trylock(&sch->dev->egress_subqueue[queue].queue_lock)) {
> + qdisc = q->queues[prio];
> + skb = qdisc->dequeue(qdisc);
> + if (skb) {
> + sch->q.qlen--;
> + skb->priority = prio;
> + spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock);
> + return skb;
> + }
> + spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock);
> + }

Your modified qdisc_restart() expects the queue_lock to be locked, how
can this work?

> + } else {
> + qdisc = q->queues[prio];
> + skb = qdisc->dequeue(qdisc);
> + if (skb) {
> + sch->q.qlen--;
> + skb->priority = prio;
> + return skb;
> + }
> + }
> +#else
> qdisc = q->queues[prio];
> skb = qdisc->dequeue(qdisc);
> if (skb) {
> sch->q.qlen--;
> + skb->priority = prio;
> return skb;
> }
> +#endif
> }
> return NULL;
> -
> }
>
> static unsigned int prio_drop(struct Qdisc* sch)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/