[PATCH net-next 6/6] net: mvneta: Improve the buffer allocation method for SWBM
From: Gregory CLEMENT
Date: Fri Jul 06 2018 - 09:20:13 EST
From: Yelena Krivosheev <yelena@xxxxxxxxxxx>
With system having a small memory (around 256MB), the state "cannot
allocate memory to refill with new buffer" is reach pretty quickly.
By this patch we changed buffer allocation method to a better handling of
this use case by avoiding memory allocation issues.
Signed-off-by: Yelena Krivosheev <yelena@xxxxxxxxxxx>
[gregory: extract from a larger patch]
Signed-off-by: Gregory CLEMENT <gregory.clement@xxxxxxxxxxx>
---
drivers/net/ethernet/marvell/mvneta.c | 281 +++++++++++++++++---------
1 file changed, 183 insertions(+), 98 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 8fc4be238083..806e78700e50 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -479,7 +479,10 @@ struct mvneta_port {
#define MVNETA_RXD_ERR_RESOURCE (BIT(17) | BIT(18))
#define MVNETA_RXD_ERR_CODE_MASK (BIT(17) | BIT(18))
#define MVNETA_RXD_L3_IP4 BIT(25)
-#define MVNETA_RXD_FIRST_LAST_DESC (BIT(26) | BIT(27))
+#define MVNETA_RXD_LAST_DESC BIT(26)
+#define MVNETA_RXD_FIRST_DESC BIT(27)
+#define MVNETA_RXD_FIRST_LAST_DESC (MVNETA_RXD_FIRST_DESC | \
+ MVNETA_RXD_LAST_DESC)
#define MVNETA_RXD_L4_CSUM_OK BIT(30)
#if defined(__LITTLE_ENDIAN)
@@ -607,6 +610,14 @@ struct mvneta_rx_queue {
/* Index of the next RX DMA descriptor to process */
int next_desc_to_proc;
+ /* Index of first RX DMA descriptor to refill */
+ int first_to_refill;
+ u32 refill_num;
+
+ /* pointer to uncomplete skb buffer */
+ struct sk_buff *skb;
+ int left_size;
+
/* error counters */
u32 skb_alloc_err;
u32 refill_err;
@@ -622,6 +633,7 @@ static int txq_number = 8;
static int rxq_def;
static int rx_copybreak __read_mostly = 256;
+static int rx_header_size __read_mostly = 128;
/* HW BM need that each port be identify by a unique ID */
static int global_port_id;
@@ -1685,13 +1697,6 @@ static void mvneta_rx_error(struct mvneta_port *pp,
{
u32 status = rx_desc->status;
- if (!mvneta_rxq_desc_is_first_last(status)) {
- netdev_err(pp->dev,
- "bad rx status %08x (buffer oversize), size=%d\n",
- status, rx_desc->data_size);
- return;
- }
-
switch (status & MVNETA_RXD_ERR_CODE_MASK) {
case MVNETA_RXD_ERR_CRC:
netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n",
@@ -1879,6 +1884,8 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
for (i = 0; i < rxq->size; i++) {
struct mvneta_rx_desc *rx_desc = rxq->descs + i;
void *data = rxq->buf_virt_addr[i];
+ if (!data || !(rx_desc->buf_phys_addr))
+ continue;
dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
@@ -1886,117 +1893,183 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
}
}
+static inline
+int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
+{
+ struct mvneta_rx_desc *rx_desc;
+ int curr_desc = rxq->first_to_refill;
+ int i;
+
+ for (i = 0; (i < rxq->refill_num) && (i < 64); i++) {
+ rx_desc = rxq->descs + curr_desc;
+ if (!(rx_desc->buf_phys_addr)) {
+ if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) {
+ pr_err("Can't refill queue %d. Done %d from %d\n",
+ rxq->id, i, rxq->refill_num);
+ rxq->refill_err++;
+ break;
+ }
+ }
+ curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc);
+ }
+ rxq->refill_num -= i;
+ rxq->first_to_refill = curr_desc;
+
+ return i;
+}
+
/* Main rx processing when using software buffer management */
-static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
+static int mvneta_rx_swbm(struct mvneta_port *pp, int budget,
struct mvneta_rx_queue *rxq)
{
struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
struct net_device *dev = pp->dev;
- int rx_done;
+ int rx_todo, rx_proc;
+ int refill = 0;
u32 rcvd_pkts = 0;
u32 rcvd_bytes = 0;
/* Get number of received packets */
- rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
-
- if (rx_todo > rx_done)
- rx_todo = rx_done;
-
- rx_done = 0;
+ rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
+ rx_proc = 0;
/* Fairness NAPI loop */
- while (rx_done < rx_todo) {
+ while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
- struct sk_buff *skb;
unsigned char *data;
struct page *page;
dma_addr_t phys_addr;
- u32 rx_status, frag_size;
- int rx_bytes, err, index;
+ u32 rx_status, index;
+ int rx_bytes, skb_size, copy_size;
+ int frag_num, frag_size, frag_offset;
- rx_done++;
- rx_status = rx_desc->status;
- rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
index = rx_desc - rxq->descs;
page = (struct page *)rxq->buf_virt_addr[index];
data = page_address(page);
/* Prefetch header */
prefetch(data);
- phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;
- if (!mvneta_rxq_desc_is_first_last(rx_status) ||
- (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
- mvneta_rx_error(pp, rx_desc);
-err_drop_frame:
- dev->stats.rx_errors++;
- /* leave the descriptor untouched */
- continue;
- }
+ phys_addr = rx_desc->buf_phys_addr;
+ rx_status = rx_desc->status;
+ rx_proc++;
+ rxq->refill_num++;
+
+ if (rx_status & MVNETA_RXD_FIRST_DESC) {
+ /* Check errors only for FIRST descriptor */
+ if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
+ mvneta_rx_error(pp, rx_desc);
+ dev->stats.rx_errors++;
+ /* leave the descriptor untouched */
+ continue;
+ }
+ rx_bytes = rx_desc->data_size -
+ (ETH_FCS_LEN + MVNETA_MH_SIZE);
- if (rx_bytes <= rx_copybreak) {
- /* better copy a small frame and not unmap the DMA region */
- skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
- if (unlikely(!skb)) {
+ /* Allocate small skb for each new packet */
+ skb_size = max(rx_copybreak, rx_header_size);
+ rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
+ if (unlikely(!rxq->skb)) {
netdev_err(dev,
"Can't allocate skb on queue %d\n",
rxq->id);
+ dev->stats.rx_dropped++;
rxq->skb_alloc_err++;
- goto err_drop_frame;
+ continue;
}
+ copy_size = min(skb_size, rx_bytes);
+
+ /* Copy data from buffer to SKB, skip Marvell header */
+ memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
+ copy_size);
+ skb_put(rxq->skb, copy_size);
+ rxq->left_size = rx_bytes - copy_size;
+
+ mvneta_rx_csum(pp, rx_status, rxq->skb);
+ if (rxq->left_size == 0) {
+ int size = copy_size + MVNETA_MH_SIZE;
+
+ dma_sync_single_range_for_cpu(dev->dev.parent,
+ phys_addr, 0,
+ size,
+ DMA_FROM_DEVICE);
+
+ /* leave the descriptor and buffer untouched */
+ } else {
+ /* refill descriptor with new buffer later */
+ rx_desc->buf_phys_addr = 0;
+
+ frag_num = 0;
+ frag_offset = copy_size + MVNETA_MH_SIZE;
+ frag_size = min(rxq->left_size,
+ (int)(PAGE_SIZE - frag_offset));
+ skb_add_rx_frag(rxq->skb, frag_num, page,
+ frag_offset, frag_size,
+ PAGE_SIZE);
+ dma_unmap_single(dev->dev.parent, phys_addr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rxq->left_size -= frag_size;
+ }
+ } else {
+ /* Middle or Last descriptor */
+ if (unlikely(!rxq->skb)) {
+ pr_debug("no skb for rx_status 0x%x\n",
+ rx_status);
+ continue;
+ }
+ if (!rxq->left_size) {
+ /* last descriptor has only FCS */
+ /* and can be discarded */
+ dma_sync_single_range_for_cpu(dev->dev.parent,
+ phys_addr, 0,
+ ETH_FCS_LEN,
+ DMA_FROM_DEVICE);
+ /* leave the descriptor and buffer untouched */
+ } else {
+ /* refill descriptor with new buffer later */
+ rx_desc->buf_phys_addr = 0;
+
+ frag_num = skb_shinfo(rxq->skb)->nr_frags;
+ frag_offset = 0;
+ frag_size = min(rxq->left_size,
+ (int)(PAGE_SIZE - frag_offset));
+ skb_add_rx_frag(rxq->skb, frag_num, page,
+ frag_offset, frag_size,
+ PAGE_SIZE);
+
+ dma_unmap_single(dev->dev.parent, phys_addr,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ rxq->left_size -= frag_size;
+ }
+ } /* Middle or Last descriptor */
- dma_sync_single_range_for_cpu(dev->dev.parent,
- phys_addr,
- MVNETA_MH_SIZE + NET_SKB_PAD,
- rx_bytes,
- DMA_FROM_DEVICE);
- skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
- rx_bytes);
-
- skb->protocol = eth_type_trans(skb, dev);
- mvneta_rx_csum(pp, rx_status, skb);
- napi_gro_receive(&port->napi, skb);
-
- rcvd_pkts++;
- rcvd_bytes += rx_bytes;
-
- /* leave the descriptor and buffer untouched */
+ if (!(rx_status & MVNETA_RXD_LAST_DESC))
+ /* no last descriptor this time */
continue;
- }
- /* Refill processing */
- err = mvneta_rx_refill(pp, rx_desc, rxq, GFP_KERNEL);
- if (err) {
- netdev_err(dev, "Linux processing - Can't refill\n");
- rxq->refill_err++;
- goto err_drop_frame;
+ if (rxq->left_size) {
+ pr_err("get last desc, but left_size (%d) != 0\n",
+ rxq->left_size);
+ dev_kfree_skb_any(rxq->skb);
+ rxq->left_size = 0;
+ rxq->skb = NULL;
+ continue;
}
-
- frag_size = pp->frag_size;
-
- skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
-
- /* After refill old buffer has to be unmapped regardless
- * the skb is successfully built or not.
- */
- dma_unmap_single(dev->dev.parent, phys_addr,
- MVNETA_RX_BUF_SIZE(pp->pkt_size),
- DMA_FROM_DEVICE);
-
- if (!skb)
- goto err_drop_frame;
-
rcvd_pkts++;
- rcvd_bytes += rx_bytes;
+ rcvd_bytes += rxq->skb->len;
/* Linux processing */
- skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
- skb_put(skb, rx_bytes);
-
- skb->protocol = eth_type_trans(skb, dev);
+ rxq->skb->protocol = eth_type_trans(rxq->skb, dev);
- mvneta_rx_csum(pp, rx_status, skb);
+ if (dev->features & NETIF_F_GRO)
+ napi_gro_receive(&port->napi, rxq->skb);
+ else
+ netif_receive_skb(rxq->skb);
- napi_gro_receive(&port->napi, skb);
+ /* clean uncomplete skb pointer in queue */
+ rxq->skb = NULL;
+ rxq->left_size = 0;
}
if (rcvd_pkts) {
@@ -2008,10 +2081,13 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
u64_stats_update_end(&stats->syncp);
}
+ /* return some buffers to hardware queue, one at a time is too slow */
+ refill = mvneta_rx_refill_queue(pp, rxq);
+
/* Update rxq management counters */
- mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+ mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill);
- return rx_done;
+ return rcvd_pkts;
}
/* Main rx processing when using hardware buffer management */
@@ -2818,21 +2894,23 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
- /* Set Offset */
- mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
-
/* Set coalescing pkts and time */
mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
if (!pp->bm_priv) {
- /* Fill RXQ with buffers from RX pool */
- mvneta_rxq_buf_size_set(pp, rxq,
- MVNETA_RX_BUF_SIZE(pp->pkt_size));
+ /* Set Offset */
+ mvneta_rxq_offset_set(pp, rxq, 0);
+ mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size);
mvneta_rxq_bm_disable(pp, rxq);
mvneta_rxq_fill(pp, rxq, rxq->size);
} else {
+ /* Set Offset */
+ mvneta_rxq_offset_set(pp, rxq,
+ NET_SKB_PAD - pp->rx_offset_correction);
+
mvneta_rxq_bm_enable(pp, rxq);
+ /* Fill RXQ with buffers from RX pool */
mvneta_rxq_long_pool_set(pp, rxq);
mvneta_rxq_short_pool_set(pp, rxq);
mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
@@ -2861,6 +2939,9 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
{
mvneta_rxq_drop_pkts(pp, rxq);
+ if (rxq->skb)
+ dev_kfree_skb_any(rxq->skb);
+
if (rxq->descs)
dma_free_coherent(pp->dev->dev.parent,
rxq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2871,6 +2952,10 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
rxq->last_desc = 0;
rxq->next_desc_to_proc = 0;
rxq->descs_phys = 0;
+ rxq->first_to_refill = 0;
+ rxq->refill_num = 0;
+ rxq->skb = NULL;
+ rxq->left_size = 0;
}
static int mvneta_txq_sw_init(struct mvneta_port *pp,
@@ -4356,14 +4441,6 @@ static int mvneta_probe(struct platform_device *pdev)
pp->dn = dn;
pp->rxq_def = rxq_def;
-
- /* Set RX packet offset correction for platforms, whose
- * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
- * platforms and 0B for 32-bit ones.
- */
- pp->rx_offset_correction =
- max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
-
pp->indir[0] = rxq_def;
/* Get special SoC configurations */
@@ -4451,6 +4528,7 @@ static int mvneta_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev);
pp->id = global_port_id++;
+ pp->rx_offset_correction = 0; /* not relevant for SW BM */
/* Obtain access to BM resources if enabled and already initialized */
bm_node = of_parse_phandle(dn, "buffer-manager", 0);
@@ -4465,6 +4543,13 @@ static int mvneta_probe(struct platform_device *pdev)
pp->bm_priv = NULL;
}
}
+ /* Set RX packet offset correction for platforms, whose
+ * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+ * platforms and 0B for 32-bit ones.
+ */
+ pp->rx_offset_correction = max(0,
+ NET_SKB_PAD -
+ MVNETA_RX_PKT_OFFSET_CORRECTION);
}
of_node_put(bm_node);
--
2.17.1