Re: [PATCH net-next 01/16] libeth: convert to netmem
From: Mina Almasry
Date: Wed Mar 05 2025 - 19:13:59 EST
On Wed, Mar 5, 2025 at 8:23 AM Alexander Lobakin
<aleksander.lobakin@xxxxxxxxx> wrote:
>
> Back when the libeth Rx core was initially written, devmem was a draft
> and netmem_ref didn't exist in the mainline. Now that it's here, make
> libeth MP-agnostic before introducing any new code or any new library
> users.
> When it's known that the created PP/FQ is for header buffers, use faster
> "unsafe" underscored netmem <--> virt accessors as netmem_is_net_iov()
> is always false in that case, but consumes some cycles (bit test +
> true branch).
> Misc: replace explicit EXPORT_SYMBOL_NS_GPL("NS") with
> DEFAULT_SYMBOL_NAMESPACE.
>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@xxxxxxxxx>
> ---
> include/net/libeth/rx.h | 22 +++++++------
> drivers/net/ethernet/intel/iavf/iavf_txrx.c | 14 ++++----
> .../ethernet/intel/idpf/idpf_singleq_txrx.c | 2 +-
> drivers/net/ethernet/intel/idpf/idpf_txrx.c | 33 +++++++++++--------
> drivers/net/ethernet/intel/libeth/rx.c | 20 ++++++-----
> 5 files changed, 51 insertions(+), 40 deletions(-)
>
> diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
> index ab05024be518..7d5dc58984b1 100644
> --- a/include/net/libeth/rx.h
> +++ b/include/net/libeth/rx.h
> @@ -1,5 +1,5 @@
> /* SPDX-License-Identifier: GPL-2.0-only */
> -/* Copyright (C) 2024 Intel Corporation */
> +/* Copyright (C) 2024-2025 Intel Corporation */
>
> #ifndef __LIBETH_RX_H
> #define __LIBETH_RX_H
> @@ -31,7 +31,7 @@
>
> /**
> * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
> - * @page: page holding the buffer
> + * @netmem: network memory reference holding the buffer
> * @offset: offset from the page start (to the headroom)
> * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
> *
> @@ -40,7 +40,7 @@
> * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
> */
> struct libeth_fqe {
> - struct page *page;
> + netmem_ref netmem;
> u32 offset;
> u32 truesize;
> } __aligned_largest;
> @@ -102,15 +102,16 @@ static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
> struct libeth_fqe *buf = &fq->fqes[i];
>
> buf->truesize = fq->truesize;
> - buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
> - if (unlikely(!buf->page))
> + buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
> + &buf->truesize);
> + if (unlikely(!buf->netmem))
> return DMA_MAPPING_ERROR;
>
> - return page_pool_get_dma_addr(buf->page) + buf->offset +
> + return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
> fq->pp->p.offset;
> }
>
> -void libeth_rx_recycle_slow(struct page *page);
> +void libeth_rx_recycle_slow(netmem_ref netmem);
>
> /**
> * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
> @@ -126,18 +127,19 @@ void libeth_rx_recycle_slow(struct page *page);
> static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
> u32 len)
> {
> - struct page *page = fqe->page;
> + netmem_ref netmem = fqe->netmem;
>
> /* Very rare, but possible case. The most common reason:
> * the last fragment contained FCS only, which was then
> * stripped by the HW.
> */
> if (unlikely(!len)) {
> - libeth_rx_recycle_slow(page);
> + libeth_rx_recycle_slow(netmem);
I think before this patch this would have expanded to:
page_pool_put_full_page(pool, page, true);
But now I think it expands to:
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
Is the switch from true to false intentional? Is this a slow path so
it doesn't matter?
> return false;
> }
>
> - page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
> + page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
> + fqe->offset, len);
>
> return true;
> }
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> index 422312b8b54a..35d353d38129 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> @@ -723,7 +723,7 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
> for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) {
> const struct libeth_fqe *rx_fqes = &rx_ring->rx_fqes[i];
>
> - page_pool_put_full_page(rx_ring->pp, rx_fqes->page, false);
> + libeth_rx_recycle_slow(rx_fqes->netmem);
>
> if (unlikely(++i == rx_ring->count))
> i = 0;
> @@ -1197,10 +1197,11 @@ static void iavf_add_rx_frag(struct sk_buff *skb,
> const struct libeth_fqe *rx_buffer,
> unsigned int size)
> {
> - u32 hr = rx_buffer->page->pp->p.offset;
> + u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset;
>
> - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
> - rx_buffer->offset + hr, size, rx_buffer->truesize);
> + skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags,
> + rx_buffer->netmem, rx_buffer->offset + hr,
> + size, rx_buffer->truesize);
> }
>
> /**
> @@ -1214,12 +1215,13 @@ static void iavf_add_rx_frag(struct sk_buff *skb,
> static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer,
> unsigned int size)
> {
> - u32 hr = rx_buffer->page->pp->p.offset;
> + struct page *buf_page = __netmem_to_page(rx_buffer->netmem);
> + u32 hr = buf_page->pp->p.offset;
> struct sk_buff *skb;
> void *va;
>
> /* prefetch first cache line of first page */
> - va = page_address(rx_buffer->page) + rx_buffer->offset;
> + va = page_address(buf_page) + rx_buffer->offset;
> net_prefetch(va + hr);
>
> /* build an skb around the page buffer */
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
> index eae1b6f474e6..aeb2ca5f5a0a 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
> @@ -1009,7 +1009,7 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
> break;
>
> skip_data:
> - rx_buf->page = NULL;
> + rx_buf->netmem = 0;
>
> IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
> cleaned_count++;
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> index bdf52cef3891..6254806c2072 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> @@ -382,12 +382,12 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
> */
> static void idpf_rx_page_rel(struct libeth_fqe *rx_buf)
> {
> - if (unlikely(!rx_buf->page))
> + if (unlikely(!rx_buf->netmem))
> return;
>
> - page_pool_put_full_page(rx_buf->page->pp, rx_buf->page, false);
> + libeth_rx_recycle_slow(rx_buf->netmem);
>
> - rx_buf->page = NULL;
> + rx_buf->netmem = 0;
> rx_buf->offset = 0;
> }
>
> @@ -3096,10 +3096,10 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
> void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
> unsigned int size)
> {
> - u32 hr = rx_buf->page->pp->p.offset;
> + u32 hr = netmem_get_pp(rx_buf->netmem)->p.offset;
>
> - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
> - rx_buf->offset + hr, size, rx_buf->truesize);
> + skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, rx_buf->netmem,
> + rx_buf->offset + hr, size, rx_buf->truesize);
> }
>
> /**
> @@ -3122,16 +3122,20 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr,
> struct libeth_fqe *buf, u32 data_len)
> {
> u32 copy = data_len <= L1_CACHE_BYTES ? data_len : ETH_HLEN;
> + struct page *hdr_page, *buf_page;
> const void *src;
> void *dst;
>
> - if (!libeth_rx_sync_for_cpu(buf, copy))
> + if (unlikely(netmem_is_net_iov(buf->netmem)) ||
> + !libeth_rx_sync_for_cpu(buf, copy))
> return 0;
>
I could not immediately understand why you need a netmem_is_net_iov
check here. libeth_rx_sync_for_cpu will delegate to
page_pool_dma_sync_netmem_for_cpu which should do the right thing
regardless of whether the netmem is a page or net_iov, right? Is this
to save some cycles?
--
Thanks,
Mina