RE: [PATCH 1/2] NET: Multiple queue network device support

From: Waskiewicz Jr, Peter P
Date: Fri Feb 23 2007 - 14:06:20 EST


map_queue will always return 0 in the pfifo_fast Qdisc. This is because
pfifo_fast is the default scheduler when a device is brought up. I
didn't want to make any assumptions about a device's functionality, so
using queue 0 seems the least dangerous.

However, in the prio qdisc, there is a small problem with the queue to
band assignment logic. If you have the same number of bands as queues
(e.g. 4 prio bands and 4 Tx queues), then you will have all bands
assigned to queue 0. I have a fix for that, which I plan to send when I
finish up a documentation piece for this patch's repost.

Thanks for the feedback,

PJ Waskiewicz

> -----Original Message-----
> From: Sreenivasa Honnur [mailto:Sreenivasa.Honnur@xxxxxxxxxxxx]
> Sent: Friday, February 23, 2007 1:01 AM
> To: Kok, Auke-jan H; David Miller; Garzik, Jeff;
> netdev@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Cc: Waskiewicz Jr, Peter P; Brandeburg, Jesse; Kok, Auke;
> Ronciak, John
> Subject: RE: [PATCH 1/2] NET: Multiple queue network device support
>
> Fucntion "map_queue" returns queue index as '0'. There is no
> support to return different queue indexes.
>
> -----Original Message-----
> From: netdev-owner@xxxxxxxxxxxxxxx
> [mailto:netdev-owner@xxxxxxxxxxxxxxx]
> On Behalf Of Kok, Auke
> Sent: Friday, February 09, 2007 5:40 AM
> To: David Miller; Garzik, Jeff; netdev@xxxxxxxxxxxxxxx;
> linux-kernel@xxxxxxxxxxxxxxx
> Cc: Kok, Auke; Peter Waskiewicz Jr; Brandeburg, Jesse; Kok,
> Auke; Ronciak, John
> Subject: [PATCH 1/2] NET: Multiple queue network device support
>
>
> From: Peter Waskiewicz Jr <peter.p.waskiewicz.jr@xxxxxxxxx>
>
> Added an API and associated supporting routines for
> multiqueue network devices. This allows network devices
> supporting multiple TX queues to configure each queue within
> the netdevice and manage each queue independantly. Changes
> to the PRIO Qdisc also allow a user to map multiple flows to
> individual TX queues, taking advantage of each queue on the device.
>
> Signed-off-by: Peter Waskiewicz Jr <peter.p.waskiewicz.jr@xxxxxxxxx>
> Signed-off-by: Auke Kok <auke-jan.h.kok@xxxxxxxxx>
> ---
>
> include/linux/netdevice.h | 73 ++++++++++++++++++++++++++++
> include/net/sch_generic.h | 3 +
> net/Kconfig | 20 ++++++++
> net/core/dev.c | 51 ++++++++++++++++++++
> net/sched/sch_generic.c | 117
> +++++++++++++++++++++++++++++++++++++++++++++
> net/sched/sch_prio.c | 106
> ++++++++++++++++++++++++++++++++++++++---
> 6 files changed, 364 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/netdevice.h
> b/include/linux/netdevice.h index
> 2e37f50..c7f94a8 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -106,6 +106,16 @@ struct netpoll_info; #define MAX_HEADER
> (LL_MAX_HEADER + 48) #endif
>
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +
> +struct net_device_subqueue
> +{
> + /* Give a lock and a flow control state for each queue */
> + unsigned long state;
> + spinlock_t queue_lock ____cacheline_aligned_in_smp;
> +};
> +#endif
> +
> /*
> * Network device statistics. Akin to the 2.0 ether stats but
> * with byte counters.
> @@ -339,6 +349,10 @@ struct net_device
> #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
> #define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
>
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +#define NETIF_F_MULTI_QUEUE 8192 /* Has multiple TX/RX queues */
> +#endif
> +
> struct net_device *next_sched;
>
> /* Interface index. Unique device identifier */
> @@ -532,6 +546,19 @@ struct net_device
> struct device dev;
> /* space for optional statistics and wireless sysfs groups */
> struct attribute_group *sysfs_groups[3];
> +
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + /* To retrieve statistics per subqueue - FOR FUTURE USE */
> + struct net_device_stats* (*get_subqueue_stats)(struct net_device
> *dev,
> + int
> queue_index);
> +
> + /* The TX queue control structures */
> + struct net_device_subqueue *egress_subqueue;
> + int egress_subqueue_count;
> + int (*hard_start_subqueue_xmit)(struct
> sk_buff *skb,
> + struct
> net_device *dev,
> + int
> queue_index);
> +#endif /* CONFIG_NET_MULTI_QUEUE_DEVICE */
> };
> #define to_net_dev(d) container_of(d, struct net_device, dev)
>
> @@ -673,6 +700,52 @@ static inline int netif_running(const
> struct net_device *dev)
> return test_bit(__LINK_STATE_START, &dev->state); }
>
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +
> +/*
> + * Routines to manage the subqueues on a device. We only need start
> + * stop, and a check if it's stopped. All other device management is
> + * done at the overall netdevice level.
> + * Also test the netif state if we're multi-queue at all.
> + */
> +static inline void netif_start_subqueue(struct net_device *dev, int
> +queue_index) {
> + clear_bit(__LINK_STATE_XOFF,
> +&dev->egress_subqueue[queue_index].state);
> +}
> +
> +static inline void netif_stop_subqueue(struct net_device *dev, int
> +queue_index) { #ifdef CONFIG_NETPOLL_TRAP
> + if (netpoll_trap())
> + return;
> +#endif
> + set_bit(__LINK_STATE_XOFF,
> &dev->egress_subqueue[queue_index].state);
> +}
> +
> +static inline int netif_subqueue_stopped(const struct
> net_device *dev,
> + int queue_index) {
> + return test_bit(__LINK_STATE_XOFF,
> + &dev->egress_subqueue[queue_index].state);
> +}
> +
> +static inline void netif_wake_subqueue(struct net_device *dev, int
> +queue_index) { #ifdef CONFIG_NETPOLL_TRAP
> + if (netpoll_trap())
> + return;
> +#endif
> + if (test_and_clear_bit(__LINK_STATE_XOFF,
> +
> &dev->egress_subqueue[queue_index].state))
> + __netif_schedule(dev);
> +}
> +
> +static inline int netif_is_multiqueue(const struct net_device *dev) {
> + return (!!(NETIF_F_MULTI_QUEUE & dev->features)); } #endif /*
> +CONFIG_NET_MULTI_QUEUE_DEVICE */
> +
>
> /* Use this variant when it is known for sure that it
> * is executing from interrupt context.
> diff --git a/include/net/sch_generic.h
> b/include/net/sch_generic.h index
> 8208639..8751ba6 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -102,6 +102,9 @@ struct Qdisc_ops
>
> int (*dump)(struct Qdisc *, struct sk_buff
> *);
> int (*dump_stats)(struct Qdisc *, struct
> gnet_dump *);
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int (*map_queue)(struct sk_buff *, struct
> Qdisc *);
> +#endif
>
> struct module *owner;
> };
> diff --git a/net/Kconfig b/net/Kconfig
> index 7dfc949..796edb3 100644
> --- a/net/Kconfig
> +++ b/net/Kconfig
> @@ -38,6 +38,26 @@ source "net/packet/Kconfig"
> source "net/unix/Kconfig"
> source "net/xfrm/Kconfig"
>
> +config NET_MULTI_QUEUE_DEVICE
> + bool "Multiple queue network device support (EXPERIMENTAL)"
> + depends on NET_SCHED && EXPERIMENTAL
> + help
> + Saying Y here will add support for network devices that have
> more than
> + one physical TX queue and/or RX queue.
> +
> + Multiple queue devices will require qdiscs that understand how
> to
> + queue to multiple targets. The default packet scheduler will
> default
> + to the first queue in a device. In other words, if you need
> the
> + ability to spread traffic across queues, your queueing
> discipline
> + needs to know how to do that.
> +
> + Note that saying Y here will give preferential treatment to
> multiple
> + queue devices in the network stack. A slight drop in
> single-queue
> + device performance may be seen.
> +
> + Say N here unless you know your network device supports
> multiple
> + TX and/or RX queues.
> +
> config INET
> bool "TCP/IP networking"
> ---help---
> diff --git a/net/core/dev.c b/net/core/dev.c index 455d589..42b635c
> 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1477,6 +1477,49 @@ gso:
> skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
> #endif
> if (q->enqueue) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int queue_index;
> + /* If we're a multi-queue device, get a queue index to
> lock */
> + if (netif_is_multiqueue(dev))
> + {
> + /* Get the queue index and lock it. */
> + if (likely(q->ops->map_queue)) {
> + queue_index = q->ops->map_queue(skb, q);
> +
> spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> + rc = q->enqueue(skb, q);
> + /*
> + * Unlock because the underlying qdisc
> + * may queue and send a packet from a
> + * different queue.
> + */
> +
> spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> + qdisc_run(dev);
> + rc = rc == NET_XMIT_BYPASS
> + ? NET_XMIT_SUCCESS : rc;
> + goto out;
> + } else {
> + printk(KERN_CRIT "Device %s is "
> + "multiqueue, but map_queue is "
> + "undefined in the qdisc!!\n",
> + dev->name);
> + kfree_skb(skb);
> + }
> + } else {
> + /* We're not a multi-queue device. */
> + spin_lock(&dev->queue_lock);
> + q = dev->qdisc;
> + if (q->enqueue) {
> + rc = q->enqueue(skb, q);
> + qdisc_run(dev);
> + spin_unlock(&dev->queue_lock);
> + rc = rc == NET_XMIT_BYPASS
> + ? NET_XMIT_SUCCESS : rc;
> + goto out;
> + }
> + spin_unlock(&dev->queue_lock);
> + }
> +#else /* No multi-queue support compiled in */
> +
> /* Grab device queue */
> spin_lock(&dev->queue_lock);
> q = dev->qdisc;
> @@ -1489,6 +1532,7 @@ gso:
> goto out;
> }
> spin_unlock(&dev->queue_lock);
> +#endif /* CONFIG_NET_MULTI_QUEUE_DEVICE */
> }
>
> /* The device has no queue. Common case for software devices:
> @@ -2879,6 +2923,9 @@ int register_netdevice(struct net_device *dev)
> struct hlist_head *head;
> struct hlist_node *p;
> int ret;
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int i;
> +#endif
>
> BUG_ON(dev_boot_phase);
> ASSERT_RTNL();
> @@ -2894,6 +2941,10 @@ int register_netdevice(struct
> net_device *dev) #ifdef CONFIG_NET_CLS_ACT
> spin_lock_init(&dev->ingress_lock);
> #endif
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + for (i = 0; i < dev->egress_subqueue_count; i++)
> + spin_lock_init(&dev->egress_subqueue[i].queue_lock);
> +#endif
>
> dev->iflink = -1;
>
> diff --git a/net/sched/sch_generic.c
> b/net/sched/sch_generic.c index bc116bd..62ca23e 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -42,6 +42,7 @@
> to data, participating in scheduling must be additionally
> protected with dev->queue_lock spinlock.
>
> + For single-queue devices:
> The idea is the following:
> - enqueue, dequeue are serialized via top level device
> spinlock dev->queue_lock.
> @@ -51,6 +52,12 @@
> hence this lock may be made without local bh disabling.
>
> qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
> +
> + For multi-queue devices:
> + - enqueue, dequeue are serialized with individual queue locks,
> + dev->egress_subqueue[queue_index].queue_lock.
> + - Everything else with tree protection with single queue devices
> apply
> + to multiqueue devices.
> */
> DEFINE_RWLOCK(qdisc_tree_lock);
>
> @@ -92,6 +99,9 @@ static inline int qdisc_restart(struct net_device
> *dev) {
> struct Qdisc *q = dev->qdisc;
> struct sk_buff *skb;
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int queue_index = 0;
> +#endif
>
> /* Dequeue packet */
> if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { @@
> -130,6 +140,72 @@ static inline int qdisc_restart(struct net_device
> *dev)
> }
>
> {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(dev)) {
> + if (likely(q->ops->map_queue)) {
> + queue_index =
> q->ops->map_queue(skb, q);
> + } else {
> + printk(KERN_CRIT "Device %s is "
> + "multiqueue, but
> map_queue is "
> + "undefined in the
> qdisc!!\n",
> + dev->name);
> + goto requeue;
> + }
> +
> spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> + /* Check top level device, and any
> sub-device */
> + if ((!netif_queue_stopped(dev)) &&
> + (!netif_subqueue_stopped(dev,
> queue_index))) {
> + int ret;
> + ret =
> dev->hard_start_subqueue_xmit(skb, dev, queue_index);
> + if (ret == NETDEV_TX_OK) {
> + if (!nolock) {
> +
> netif_tx_unlock(dev);
> + }
> + return -1;
> + }
> + if (ret == NETDEV_TX_LOCKED &&
> nolock) {
> +
> spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> + goto collision;
> + }
> + }
> + /* NETDEV_TX_BUSY - we need to requeue
> */
> + /* Release the driver */
> + if (!nolock) {
> + netif_tx_unlock(dev);
> + }
> +
> spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> + q = dev->qdisc;
> + }
> + else
> + {
> + /* We're a single-queue device */
> + /* And release queue */
> + spin_unlock(&dev->queue_lock);
> + if (!netif_queue_stopped(dev)) {
> + int ret;
> +
> + ret = dev->hard_start_xmit(skb,
> dev);
> + if (ret == NETDEV_TX_OK) {
> + if (!nolock) {
> +
> netif_tx_unlock(dev);
> + }
> +
> spin_lock(&dev->queue_lock);
> + return -1;
> + }
> + if (ret == NETDEV_TX_LOCKED &&
> nolock) {
> +
> spin_lock(&dev->queue_lock);
> + goto collision;
> + }
> + }
> + /* NETDEV_TX_BUSY - we need to requeue
> */
> + /* Release the driver */
> + if (!nolock) {
> + netif_tx_unlock(dev);
> + }
> + spin_lock(&dev->queue_lock);
> + q = dev->qdisc;
> + }
> +#else /* CONFIG_NET_MULTI_QUEUE_DEVICE */
> /* And release queue */
> spin_unlock(&dev->queue_lock);
>
> @@ -157,6 +233,7 @@ static inline int qdisc_restart(struct net_device
> *dev)
> }
> spin_lock(&dev->queue_lock);
> q = dev->qdisc;
> +#endif /* CONFIG_NET_MULTI_QUEUE_DEVICE */
> }
>
> /* Device kicked us out :(
> @@ -175,9 +252,17 @@ requeue:
> else
> q->ops->requeue(skb, q);
> netif_schedule(dev);
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(dev))
> +
> spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> +#endif
> return 1;
> }
> BUG_ON((int) q->q.qlen < 0);
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(dev))
> +
> spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> +#endif
> return q->q.qlen;
> }
>
> @@ -287,12 +372,22 @@ static int noop_requeue(struct sk_buff
> *skb, struct Qdisc* qdisc)
> return NET_XMIT_CN;
> }
>
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +static int noop_map_queue(struct sk_buff *skb, struct Qdisc *qdisc) {
> + return 0;
> +}
> +#endif
> +
> struct Qdisc_ops noop_qdisc_ops = {
> .id = "noop",
> .priv_size = 0,
> .enqueue = noop_enqueue,
> .dequeue = noop_dequeue,
> .requeue = noop_requeue,
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + .map_queue = noop_map_queue,
> +#endif
> .owner = THIS_MODULE,
> };
>
> @@ -310,6 +405,9 @@ static struct Qdisc_ops noqueue_qdisc_ops = {
> .enqueue = noop_enqueue,
> .dequeue = noop_dequeue,
> .requeue = noop_requeue,
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + .map_queue = noop_map_queue,
> +#endif
> .owner = THIS_MODULE,
> };
>
> @@ -356,10 +454,18 @@ static struct sk_buff *pfifo_fast_dequeue(struct
> Qdisc* qdisc)
> struct sk_buff_head *list = qdisc_priv(qdisc);
>
> for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(qdisc->dev))
> +
> spin_lock(&qdisc->dev->egress_subqueue[0].queue_lock);
> +#endif
> if (!skb_queue_empty(list + prio)) {
> qdisc->q.qlen--;
> return __qdisc_dequeue_head(qdisc, list + prio);
> }
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(qdisc->dev))
> +
> spin_unlock(&qdisc->dev->egress_subqueue[0].queue_lock);
> +#endif
> }
>
> return NULL;
> @@ -406,6 +512,14 @@ static int pfifo_fast_init(struct Qdisc
> *qdisc, struct rtattr *opt)
> return 0;
> }
>
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +static int pfifo_fast_map_queue(struct sk_buff *skb, struct Qdisc
> +*qdisc) {
> + /* Generic qdisc, so always return queue index 0. */
> + return 0;
> +}
> +#endif
> +
> static struct Qdisc_ops pfifo_fast_ops = {
> .id = "pfifo_fast",
> .priv_size = PFIFO_FAST_BANDS * sizeof(struct
> sk_buff_head),
> @@ -415,6 +529,9 @@ static struct Qdisc_ops pfifo_fast_ops = {
> .init = pfifo_fast_init,
> .reset = pfifo_fast_reset,
> .dump = pfifo_fast_dump,
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + .map_queue = pfifo_fast_map_queue,
> +#endif
> .owner = THIS_MODULE,
> };
>
> diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index
> 2567b4c..0e24071 100644
> --- a/net/sched/sch_prio.c
> +++ b/net/sched/sch_prio.c
> @@ -43,16 +43,19 @@ struct prio_sched_data
> struct tcf_proto *filter_list;
> u8 prio2band[TC_PRIO_MAX+1];
> struct Qdisc *queues[TCQ_PRIO_BANDS];
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int band2queue[TC_PRIO_MAX + 1];
> +#endif
> };
>
> -
> static struct Qdisc *
> -prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
> +prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr, int
> +*retband)
> {
> struct prio_sched_data *q = qdisc_priv(sch);
> u32 band = skb->priority;
> struct tcf_result res;
>
> + *retband = band;
> *qerr = NET_XMIT_BYPASS;
> if (TC_H_MAJ(skb->priority) != sch->handle) { #ifdef
> CONFIG_NET_CLS_ACT @@ -68,13 +71,16 @@ prio_classify(struct
> sk_buff *skb, struct Qdisc *sch, int *qerr) #else
> if (!q->filter_list || tc_classify(skb, q->filter_list,
> &res)) { #endif
> - if (TC_H_MAJ(band))
> + if (TC_H_MAJ(band)) {
> band = 0;
> + *retband = 0;
> + }
> return
> q->queues[q->prio2band[band&TC_PRIO_MAX]];
> }
> band = res.classid;
> }
> band = TC_H_MIN(band) - 1;
> + *retband = band;
> if (band > q->bands)
> return q->queues[q->prio2band[0]];
>
> @@ -86,8 +92,15 @@ prio_enqueue(struct sk_buff *skb, struct
> Qdisc *sch) {
> struct Qdisc *qdisc;
> int ret;
> + int band; /* Unused in this function, just here for multi-queue
> */
> +
> + /*
> + * NOTE: if a device is multiqueue, then it is imperative the
> + * underlying qdisc NOT be another PRIO qdisc. Otherwise we're
> going
> + * to deadlock. Fixme please.
> + */
> + qdisc = prio_classify(skb, sch, &ret, &band);
>
> - qdisc = prio_classify(skb, sch, &ret);
> #ifdef CONFIG_NET_CLS_ACT
> if (qdisc == NULL) {
>
> @@ -108,14 +121,34 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc
> *sch)
> return ret;
> }
>
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +static int
> +prio_map_queue(struct sk_buff *skb, struct Qdisc *sch) {
> + /*
> + * We'll classify the skb here, so return the Qdisc back through
> + * the function call. The return value is the queue we need to
> + * lock, which will be the value returned through band. This
> will help
> + * speed up enqueue, since it won't have to do the classify
> again.
> + */
> + int ret;
> + int band;
> + struct prio_sched_data *q = qdisc_priv(sch);
> +
> + sch = prio_classify(skb, sch, &ret, &band);
> + return q->band2queue[band];
> +}
> +#endif
>
> static int
> prio_requeue(struct sk_buff *skb, struct Qdisc* sch) {
> struct Qdisc *qdisc;
> int ret;
> + int band;
> +
> + qdisc = prio_classify(skb, sch, &ret, &band);
>
> - qdisc = prio_classify(skb, sch, &ret);
> #ifdef CONFIG_NET_CLS_ACT
> if (qdisc == NULL) {
> if (ret == NET_XMIT_BYPASS)
> @@ -141,18 +174,53 @@ prio_dequeue(struct Qdisc* sch)
> struct sk_buff *skb;
> struct prio_sched_data *q = qdisc_priv(sch);
> int prio;
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int queue;
> +#endif
> struct Qdisc *qdisc;
>
> + /*
> + * If we're multiqueue, the basic approach is try the lock on
> each
> + * queue. If it's locked, either we're already dequeuing, or
> enqueue
> + * is doing something. Go to the next band if we're locked.
> Once we
> + * have a packet, unlock the queue. NOTE: the underlying qdisc
> CANNOT
> + * be a PRIO qdisc, otherwise we will deadlock. FIXME
> + */
> for (prio = 0; prio < q->bands; prio++) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + if (netif_is_multiqueue(sch->dev)) {
> + queue = q->band2queue[prio];
> + if
> (spin_trylock(&sch->dev->egress_subqueue[queue].queue_lock)) {
> + qdisc = q->queues[prio];
> + skb = qdisc->dequeue(qdisc);
> + if (skb) {
> + sch->q.qlen--;
> + skb->priority = prio;
> +
> spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock);
> + return skb;
> + }
> +
> spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock);
> + }
> + } else {
> + qdisc = q->queues[prio];
> + skb = qdisc->dequeue(qdisc);
> + if (skb) {
> + sch->q.qlen--;
> + skb->priority = prio;
> + return skb;
> + }
> + }
> +#else
> qdisc = q->queues[prio];
> skb = qdisc->dequeue(qdisc);
> if (skb) {
> sch->q.qlen--;
> + skb->priority = prio;
> return skb;
> }
> +#endif
> }
> return NULL;
> -
> }
>
> static unsigned int prio_drop(struct Qdisc* sch) @@ -205,6
> +273,10 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
> struct prio_sched_data *q = qdisc_priv(sch);
> struct tc_prio_qopt *qopt = RTA_DATA(opt);
> int i;
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + int queue;
> + int qmapoffset;
> +#endif
>
> if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
> return -EINVAL;
> @@ -247,6 +319,25 @@ static int prio_tune(struct Qdisc *sch,
> struct rtattr *opt)
> }
> }
> }
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + /* setup queue to band mapping */
> + if (netif_is_multiqueue(sch->dev)) {
> + if (q->bands == sch->dev->egress_subqueue_count)
> + qmapoffset = 0;
> + else
> + qmapoffset = q->bands /
> sch->dev->egress_subqueue_count;
> +
> + queue = 0;
> + for (i = 0; i < q->bands; i++) {
> + q->band2queue[i] = queue;
> + if ( (((i + 1) - queue) == qmapoffset) &&
> + (queue < (sch->dev->egress_subqueue_count -
> 1))) {
> + queue++;
> + }
> + }
> + }
> +
> +#endif
> return 0;
> }
>
> @@ -430,6 +521,9 @@ static struct Qdisc_ops prio_qdisc_ops = {
> .destroy = prio_destroy,
> .change = prio_tune,
> .dump = prio_dump,
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> + .map_queue = prio_map_queue,
> +#endif
> .owner = THIS_MODULE,
> };
>
>
>
>
> ---
> Auke Kok <auke-jan.h.kok@xxxxxxxxx>
> -
> To unsubscribe from this list: send the line "unsubscribe
> netdev" in the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/