[PATCH] net: core: fix unbalanced qdisc_run_begin/qdisc_run_end

From: Wen Yang
Date: Sat Nov 09 2019 - 21:02:32 EST


3598 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3599 struct net_device *dev,
3600 struct netdev_queue *txq)
3601 {
...
3650 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3651 qdisc_run_begin(q)) {

---> Those multiple *and conditions* in this if statement are not
necessarily executed sequentially. If the qdisc_run_begin(q)
statement is executed first and the other conditions are not
satisfied, qdisc_run_end will have no chance to be executed,
and the lowest bit of q->running will always be 1.
This may lead to a softlockup:
https://bugzilla.kernel.org/show_bug.cgi?id=205427
...
3657
3658 qdisc_bstats_update(q, skb);
...
3661 if (unlikely(contended)) {
3662 spin_unlock(&q->busylock);
3663 contended = false;
3664 }
3665 __qdisc_run(q);
3666 }
3667
3668 qdisc_run_end(q);
3669 rc = NET_XMIT_SUCCESS;
3670 }
...

We ensure the correct execution order by explicitly
specifying those dependencies.
Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Wen Yang <wenyang@xxxxxxxxxxxxxxxxx>
Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
Cc: Eric Dumazet <edumazet@xxxxxxxxxx>
Cc: Cong Wang <xiyou.wangcong@xxxxxxxxx>
Cc: Jamal Hadi Salim <jhs@xxxxxxxxxxxx>
Cc: John Fastabend <john.fastabend@xxxxxxxxx>
Cc: Kevin Athey <kda@xxxxxxxxxx>
Cc: Xiaotian Pei <xiaotian@xxxxxxxxxx>
Cc: netdev@xxxxxxxxxxxxxxx
Cc: bpf@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
net/core/dev.c | 63 ++++++++++++++++++++++++++++++----------------------------
1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 20c7a67..d2690ee 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3602,27 +3602,28 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
spinlock_t *root_lock = qdisc_lock(q);
struct sk_buff *to_free = NULL;
bool contended;
- int rc;
+ int rc = NET_XMIT_SUCCESS;

qdisc_calculate_pkt_len(skb, q);

if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
+ if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty) {
+ if (qdisc_run_begin(q)) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+ &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ goto end_run;
+ }
+ qdisc_bstats_cpu_update(q, skb);

- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
+ if (sch_direct_xmit(skb, q, dev, txq, NULL,
+ true))
+ __qdisc_run(q);

end_run:
- qdisc_run_end(q);
+ qdisc_run_end(q);
+ }
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
qdisc_run(q);
@@ -3647,26 +3648,28 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
__qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
- } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
- qdisc_run_begin(q)) {
- /*
- * This is a work-conserving queue; there are no old skbs
- * waiting to be sent out; and the qdisc is not running -
- * xmit the skb directly.
- */
+ } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q)) {
+ if (qdisc_run_begin(q)) {
+ /* This is a work-conserving queue;
+ * there are no old skbs waiting to be sent out;
+ * and the qdisc is not running -
+ * xmit the skb directly.
+ */

- qdisc_bstats_update(q, skb);
+ qdisc_bstats_update(q, skb);

- if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
- if (unlikely(contended)) {
- spin_unlock(&q->busylock);
- contended = false;
+ if (sch_direct_xmit(skb, q, dev, txq, root_lock,
+ true)) {
+ if (unlikely(contended)) {
+ spin_unlock(&q->busylock);
+ contended = false;
+ }
+ __qdisc_run(q);
}
- __qdisc_run(q);
- }

- qdisc_run_end(q);
- rc = NET_XMIT_SUCCESS;
+ qdisc_run_end(q);
+ rc = NET_XMIT_SUCCESS;
+ }
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
--
1.8.3.1