RE: [PATCH V3 net-next] net: fec: add XDP_TX feature support
From: Wei Fang
Date: Wed Aug 02 2023 - 23:58:33 EST
> > } else {
> > - xdp_return_frame(xdpf);
> > + xdp_return_frame_rx_napi(xdpf);
>
> If you implement Jesper's syncing suggestions, I think you can use
>
> page_pool_put_page(pool, page, 0, true);
>
> for XDP_TX here to avoid the DMA sync on page recycle.
I tried Jasper's syncing suggestion and used page_pool_put_page() to recycle
pages, but the results does not seem to improve the performance of XDP_TX,
it even degrades the speed.
The result of the current modification.
root@imx8mpevk:~# ./xdp2 eth0
proto 17: 260180 pkt/s
proto 17: 260373 pkt/s
proto 17: 260363 pkt/s
proto 17: 259036 pkt/s
proto 17: 260180 pkt/s
proto 17: 260048 pkt/s
proto 17: 260029 pkt/s
proto 17: 260133 pkt/s
proto 17: 260021 pkt/s
proto 17: 260203 pkt/s
proto 17: 260293 pkt/s
proto 17: 259418 pkt/s
After using the sync suggestion, the result shows as follow.
root@imx8mpevk:~# ./xdp2 eth0
proto 17: 255956 pkt/s
proto 17: 255841 pkt/s
proto 17: 255835 pkt/s
proto 17: 255381 pkt/s
proto 17: 255736 pkt/s
proto 17: 255779 pkt/s
proto 17: 254135 pkt/s
proto 17: 255584 pkt/s
proto 17: 255855 pkt/s
proto 17: 255664 pkt/s
Below are my changes, I don't know what cause it. Based on the results,
it's better to keep the current modification.
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d5fda24a4c52..415c0cb83f84 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -77,7 +77,8 @@
static void set_multicast_list(struct net_device *ndev);
static void fec_enet_itr_coal_set(struct net_device *ndev);
static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
- struct xdp_buff *xdp);
+ struct xdp_buff *xdp,
+ u32 dma_sync_len);
#define DRIVER_NAME "fec"
@@ -1487,7 +1488,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
/* Free the sk buffer associated with this last transmit */
dev_kfree_skb_any(skb);
} else {
- xdp_return_frame_rx_napi(xdpf);
+ if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO)
+ xdp_return_frame_rx_napi(xdpf);
+ else {
+ struct page *page;
+
+ page = virt_to_head_page(xdpf->data);
+ page_pool_put_page(page->pp, page, 0, true);
+ }
txq->tx_buf[index].xdp = NULL;
/* restore default tx buffer type: FEC_TXBUF_T_SKB */
@@ -1557,7 +1565,8 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
act = bpf_prog_run_xdp(prog, xdp);
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
- sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM;
+ sync = xdp->data_end - xdp->data;
sync = max(sync, len);
switch (act) {
@@ -1579,7 +1588,7 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
break;
case XDP_TX:
- err = fec_enet_xdp_tx_xmit(fep->netdev, xdp);
+ err = fec_enet_xdp_tx_xmit(fep->netdev, xdp, sync);
if (unlikely(err)) {
ret = FEC_ENET_XDP_CONSUMED;
page = virt_to_head_page(xdp->data);
@@ -3807,6 +3816,7 @@ fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index)
static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
struct fec_enet_priv_tx_q *txq,
struct xdp_frame *frame,
+ u32 dma_sync_len,
bool ndo_xmit)
{
unsigned int index, status, estatus;
@@ -3840,7 +3850,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
dma_addr = page_pool_get_dma_addr(page) + sizeof(*frame) +
frame->headroom;
dma_sync_single_for_device(&fep->pdev->dev, dma_addr,
- frame->len, DMA_BIDIRECTIONAL);
+ dma_sync_len, DMA_BIDIRECTIONAL);
txq->tx_buf[index].type = FEC_TXBUF_T_XDP_TX;
}
@@ -3889,7 +3899,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
}
static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
- struct xdp_buff *xdp)
+ struct xdp_buff *xdp,
+ u32 dma_sync_len)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -3909,7 +3920,7 @@ static int fec_enet_xdp_tx_xmit(struct net_device *ndev,
/* Avoid tx timeout as XDP shares the queue with kernel stack */
txq_trans_cond_update(nq);
- ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, false);
+ ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, dma_sync_len, false);
__netif_tx_unlock(nq);
@@ -3938,7 +3949,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
/* Avoid tx timeout as XDP shares the queue with kernel stack */
txq_trans_cond_update(nq);
for (i = 0; i < num_frames; i++) {
- if (fec_enet_txq_xmit_frame(fep, txq, frames[i], true) < 0)
+ if (fec_enet_txq_xmit_frame(fep, txq, frames[i], 0, true) < 0)
break;
sent_frames++;
}