[PATCH 4.0 3/7] bnx2x: Fix busy_poll vs netpoll

From: Greg Kroah-Hartman
Date: Sun Apr 26 2015 - 09:41:13 EST


4.0-stable review patch. If anyone has any objections, please let me know.

------------------

From: Eric Dumazet <edumazet@xxxxxxxxxx>

[ Upstream commit 074975d0374333f656c48487aa046a21a9b9d7a1 ]

Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
switched the napi/busy_lock locking mechanism from spin_lock() into
spin_lock_bh(), breaking inter-operability with netconsole, as netpoll
disables interrupts prior to calling our napi mechanism.

This switches the driver into using atomic assignments instead of the
spinlock mechanisms previously employed.

Based on initial patch from Yuval Mintz & Ariel Elior

I basically added softirq starvation avoidance, and mixture
of atomic operations, plain writes and barriers.

Note this slightly reduces the overhead for this driver when no
busy_poll sockets are in use.

Fixes: 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 135 ++++++++----------------
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 9 -
2 files changed, 55 insertions(+), 89 deletions(-)

--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -531,20 +531,8 @@ struct bnx2x_fastpath {
struct napi_struct napi;

#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned int state;
-#define BNX2X_FP_STATE_IDLE 0
-#define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */
-#define BNX2X_FP_STATE_POLL (1 << 1) /* poll owns this FP */
-#define BNX2X_FP_STATE_DISABLED (1 << 2)
-#define BNX2X_FP_STATE_NAPI_YIELD (1 << 3) /* NAPI yielded this FP */
-#define BNX2X_FP_STATE_POLL_YIELD (1 << 4) /* poll yielded this FP */
-#define BNX2X_FP_OWNED (BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL)
-#define BNX2X_FP_YIELD (BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD)
-#define BNX2X_FP_LOCKED (BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED)
-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
- /* protect state */
- spinlock_t lock;
-#endif /* CONFIG_NET_RX_BUSY_POLL */
+ unsigned long busy_poll_state;
+#endif

union host_hc_status_block status_blk;
/* chip independent shortcuts into sb structure */
@@ -619,104 +607,83 @@ struct bnx2x_fastpath {
#define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats))

#ifdef CONFIG_NET_RX_BUSY_POLL
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+
+enum bnx2x_fp_state {
+ BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */
+
+ BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
+ BNX2X_STATE_FP_NAPI_REQ = BIT(1),
+
+ BNX2X_STATE_FP_POLL_BIT = 2,
+ BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */
+
+ BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
+};
+
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
{
- spin_lock_init(&fp->lock);
- fp->state = BNX2X_FP_STATE_IDLE;
+ WRITE_ONCE(fp->busy_poll_state, 0);
}

/* called from the device poll routine to get ownership of a FP */
static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
{
- bool rc = true;
+ unsigned long prev, old = READ_ONCE(fp->busy_poll_state);

- spin_lock_bh(&fp->lock);
- if (fp->state & BNX2X_FP_LOCKED) {
- WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
- fp->state |= BNX2X_FP_STATE_NAPI_YIELD;
- rc = false;
- } else {
- /* we don't care if someone yielded */
- fp->state = BNX2X_FP_STATE_NAPI;
+ while (1) {
+ switch (old) {
+ case BNX2X_STATE_FP_POLL:
+ /* make sure bnx2x_fp_lock_poll() wont starve us */
+ set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
+ &fp->busy_poll_state);
+ /* fallthrough */
+ case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
+ return false;
+ default:
+ break;
+ }
+ prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
+ if (unlikely(prev != old)) {
+ old = prev;
+ continue;
+ }
+ return true;
}
- spin_unlock_bh(&fp->lock);
- return rc;
}

-/* returns true is someone tried to get the FP while napi had it */
-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
{
- bool rc = false;
-
- spin_lock_bh(&fp->lock);
- WARN_ON(fp->state &
- (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD));
-
- if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
- rc = true;
-
- /* state ==> idle, unless currently disabled */
- fp->state &= BNX2X_FP_STATE_DISABLED;
- spin_unlock_bh(&fp->lock);
- return rc;
+ smp_wmb();
+ fp->busy_poll_state = 0;
}

/* called from bnx2x_low_latency_poll() */
static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
{
- bool rc = true;
-
- spin_lock_bh(&fp->lock);
- if ((fp->state & BNX2X_FP_LOCKED)) {
- fp->state |= BNX2X_FP_STATE_POLL_YIELD;
- rc = false;
- } else {
- /* preserve yield marks */
- fp->state |= BNX2X_FP_STATE_POLL;
- }
- spin_unlock_bh(&fp->lock);
- return rc;
+ return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
}

-/* returns true if someone tried to get the FP while it was locked */
-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
{
- bool rc = false;
-
- spin_lock_bh(&fp->lock);
- WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
-
- if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
- rc = true;
-
- /* state ==> idle, unless currently disabled */
- fp->state &= BNX2X_FP_STATE_DISABLED;
- spin_unlock_bh(&fp->lock);
- return rc;
+ smp_mb__before_atomic();
+ clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
}

-/* true if a socket is polling, even if it did not get the lock */
+/* true if a socket is polling */
static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
{
- WARN_ON(!(fp->state & BNX2X_FP_OWNED));
- return fp->state & BNX2X_FP_USER_PEND;
+ return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
}

/* false if fp is currently owned */
static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
{
- int rc = true;
-
- spin_lock_bh(&fp->lock);
- if (fp->state & BNX2X_FP_OWNED)
- rc = false;
- fp->state |= BNX2X_FP_STATE_DISABLED;
- spin_unlock_bh(&fp->lock);
+ set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
+ return !bnx2x_fp_ll_polling(fp);

- return rc;
}
#else
-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
{
}

@@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(st
return true;
}

-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
{
- return false;
}

static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
@@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(st
return false;
}

-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
{
- return false;
}

static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struc
int i;

for_each_rx_queue_cnic(bp, i) {
- bnx2x_fp_init_lock(&bp->fp[i]);
+ bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
@@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx
int i;

for_each_eth_queue(bp, i) {
- bnx2x_fp_init_lock(&bp->fp[i]);
+ bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
@@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct
}
}

+ bnx2x_fp_unlock_napi(fp);
+
/* Fall out from the NAPI loop if needed */
- if (!bnx2x_fp_unlock_napi(fp) &&
- !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {

/* No need to update SB for FCoE L2 ring as long as
* it's connected to the default SB and the SB


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/