[PATCH] net: alx: use custom skb allocator

From: Feng Tang
Date: Fri May 20 2016 - 01:46:32 EST


This patch follows Eric Dumazet's commit 7b70176421 to fix one
exactly same bug in alx driver, that the network link will
be lost in 1-5 minutes after the device is up.

Following is a git log from Eric's 7b70176421:

"We had reports ( https://bugzilla.kernel.org/show_bug.cgi?id=54021 )
that using high order pages for skb allocations is problematic for atl1c

We do not know exactly what the problem is, but we suspect that crossing
4K pages is not well supported by this hardware.

Use a custom allocator, using page allocator and 2K fragments for
optimal stack behavior. We might make this allocator generic
in future kernels."

And my debug shows the same suspect, most of the errors happen
when there is a RX buffer address with 0x......f80, hopefully
this will get noticed and fixed from silicon side.

My victim is a Lenovo Y580 Laptop with Atheros ALX AR8161 etherenet
device(PCI ID 1969:1091), with this patch the ethernet dev
works just fine

Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx>
---
drivers/net/ethernet/atheros/alx/alx.h | 4 +++
drivers/net/ethernet/atheros/alx/main.c | 48 ++++++++++++++++++++++++++++++++-
2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 8fc93c5..d02c424 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -96,6 +96,10 @@ struct alx_priv {
unsigned int rx_ringsz;
unsigned int rxbuf_size;

+ struct page *rx_page;
+ unsigned int rx_page_offset;
+ unsigned int rx_frag_size;
+
struct napi_struct napi;
struct alx_tx_queue txq;
struct alx_rx_queue rxq;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 9fe8b5e..c98acdc 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -70,6 +70,35 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
}
}

+static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp)
+{
+ struct sk_buff *skb;
+ struct page *page;
+
+ if (alx->rx_frag_size > PAGE_SIZE)
+ return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
+
+ page = alx->rx_page;
+ if (!page) {
+ alx->rx_page = page = alloc_page(gfp);
+ if (unlikely(!page))
+ return NULL;
+ alx->rx_page_offset = 0;
+ }
+
+ skb = build_skb(page_address(page) + alx->rx_page_offset,
+ alx->rx_frag_size);
+ if (likely(skb)) {
+ alx->rx_page_offset += alx->rx_frag_size;
+ if (alx->rx_page_offset >= PAGE_SIZE)
+ alx->rx_page = NULL;
+ else
+ get_page(page);
+ }
+ return skb;
+}
+
+
static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
{
struct alx_rx_queue *rxq = &alx->rxq;
@@ -86,7 +115,7 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
while (!cur_buf->skb && next != rxq->read_idx) {
struct alx_rfd *rfd = &rxq->rfd[cur];

- skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
+ skb = alx_alloc_skb(alx, gfp);
if (!skb)
break;
dma = dma_map_single(&alx->hw.pdev->dev,
@@ -124,6 +153,7 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur);
}

+
return count;
}

@@ -592,6 +622,11 @@ static void alx_free_rings(struct alx_priv *alx)
kfree(alx->txq.bufs);
kfree(alx->rxq.bufs);

+ if (alx->rx_page) {
+ put_page(alx->rx_page);
+ alx->rx_page = NULL;
+ }
+
dma_free_coherent(&alx->hw.pdev->dev,
alx->descmem.size,
alx->descmem.virt,
@@ -646,6 +681,7 @@ static int alx_request_irq(struct alx_priv *alx)
alx->dev->name, alx);
if (!err)
goto out;
+
/* fall back to legacy interrupt */
pci_disable_msi(alx->hw.pdev);
}
@@ -689,6 +725,7 @@ static int alx_init_sw(struct alx_priv *alx)
struct pci_dev *pdev = alx->hw.pdev;
struct alx_hw *hw = &alx->hw;
int err;
+ unsigned int head_size;

err = alx_identify_hw(alx);
if (err) {
@@ -704,7 +741,12 @@ static int alx_init_sw(struct alx_priv *alx)

hw->smb_timer = 400;
hw->mtu = alx->dev->mtu;
+
alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
+ head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ alx->rx_frag_size = roundup_pow_of_two(head_size);
+
alx->tx_ringsz = 256;
alx->rx_ringsz = 512;
hw->imt = 200;
@@ -806,6 +848,7 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
{
struct alx_priv *alx = netdev_priv(netdev);
int max_frame = ALX_MAX_FRAME_LEN(mtu);
+ unsigned int head_size;

if ((max_frame < ALX_MIN_FRAME_SIZE) ||
(max_frame > ALX_MAX_FRAME_SIZE))
@@ -817,6 +860,9 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
netdev->mtu = mtu;
alx->hw.mtu = mtu;
alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
+ head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ alx->rx_frag_size = roundup_pow_of_two(head_size);
netdev_update_features(netdev);
if (netif_running(netdev))
alx_reinit(alx);
--
2.5.0