Re: [PATCH net-next v3 3/3] net: add netmem_ref to skb_frag_t

From: Willem de Bruijn
Date: Thu Dec 21 2023 - 12:20:06 EST


Mina Almasry wrote:
> Use netmem_ref instead of page in skb_frag_t. Currently netmem_ref
> is always a struct page underneath, but the abstraction allows efforts
> to add support for skb frags not backed by pages.
>
> There is unfortunately 1 instance where the skb_frag_t is assumed to be
> a bio_vec in kcm. For this case, add a debug assert that the skb frag is
> indeed backed by a page, and do a cast.
>
> Add skb[_frag]_fill_netmem_*() and skb_add_rx_frag_netmem() helpers so
> that the API can be used to create netmem skbs.
>
> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
>
> ---
>
> v3;
> - Renamed the fields in skb_frag_t.
>
> v2:
> - Add skb frag filling helpers.
>
> ---
> include/linux/skbuff.h | 92 +++++++++++++++++++++++++++++-------------
> net/core/skbuff.c | 22 +++++++---
> net/kcm/kcmsock.c | 10 ++++-
> 3 files changed, 89 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 7ce38874dbd1..729c95e97be1 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -37,6 +37,7 @@
> #endif
> #include <net/net_debug.h>
> #include <net/dropreason-core.h>
> +#include <net/netmem.h>
>
> /**
> * DOC: skb checksums
> @@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags;
> */
> #define GSO_BY_FRAGS 0xFFFF
>
> -typedef struct bio_vec skb_frag_t;
> +typedef struct skb_frag {
> + netmem_ref netmem;
> + unsigned int len;
> + unsigned int offset;
> +} skb_frag_t;
>
> /**
> * skb_frag_size() - Returns the size of a skb fragment
> @@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t;
> */
> static inline unsigned int skb_frag_size(const skb_frag_t *frag)
> {
> - return frag->bv_len;
> + return frag->len;
> }
>
> /**
> @@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
> */
> static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
> {
> - frag->bv_len = size;
> + frag->len = size;
> }
>
> /**
> @@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
> */
> static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
> {
> - frag->bv_len += delta;
> + frag->len += delta;
> }
>
> /**
> @@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
> */
> static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
> {
> - frag->bv_len -= delta;
> + frag->len -= delta;
> }
>
> /**
> @@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p)
> * skb_frag_foreach_page - loop over pages in a fragment
> *
> * @f: skb frag to operate on
> - * @f_off: offset from start of f->bv_page
> + * @f_off: offset from start of f->netmem
> * @f_len: length from f_off to loop over
> * @p: (temp var) current page
> * @p_off: (temp var) offset from start of current page,
> @@ -2431,22 +2436,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
> return skb_headlen(skb) + __skb_pagelen(skb);
> }
>
> +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
> + netmem_ref netmem, int off,
> + int size)
> +{
> + frag->netmem = netmem;
> + frag->offset = off;
> + skb_frag_size_set(frag, size);
> +}
> +
> static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
> struct page *page,
> int off, int size)
> {
> - frag->bv_page = page;
> - frag->bv_offset = off;
> - skb_frag_size_set(frag, size);
> + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
> +}
> +
> +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
> + int i, netmem_ref netmem,
> + int off, int size)
> +{
> + skb_frag_t *frag = &shinfo->frags[i];
> +
> + skb_frag_fill_netmem_desc(frag, netmem, off, size);
> }
>
> static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
> int i, struct page *page,
> int off, int size)
> {
> - skb_frag_t *frag = &shinfo->frags[i];
> -
> - skb_frag_fill_page_desc(frag, page, off, size);
> + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
> + size);
> }
>
> /**
> @@ -2462,10 +2482,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
> }
>
> /**
> - * __skb_fill_page_desc - initialise a paged fragment in an skb
> + * __skb_fill_netmem_desc - initialise a fragment in an skb
> * @skb: buffer containing fragment to be initialised
> - * @i: paged fragment index to initialise
> - * @page: the page to use for this fragment
> + * @i: fragment index to initialise
> + * @netmem: the netmem to use for this fragment
> * @off: the offset to the data with @page
> * @size: the length of the data
> *
> @@ -2474,10 +2494,13 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
> *
> * Does not take any additional reference on the fragment.
> */
> -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
> - struct page *page, int off, int size)
> +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
> + netmem_ref netmem, int off,
> + int size)
> {
> - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
> + struct page *page = netmem_to_page(netmem);
> +
> + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
>
> /* Propagate page pfmemalloc to the skb if we can. The problem is
> * that not all callers have unique ownership of the page but rely
> @@ -2485,7 +2508,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
> */
> page = compound_head(page);
> if (page_is_pfmemalloc(page))
> - skb->pfmemalloc = true;
> + skb->pfmemalloc = true;
> +}
> +
> +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
> + struct page *page, int off, int size)
> +{
> + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
> +}
> +
> +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
> + netmem_ref netmem, int off,
> + int size)
> +{
> + __skb_fill_netmem_desc(skb, i, netmem, off, size);
> + skb_shinfo(skb)->nr_frags = i + 1;
> }
>
> /**
> @@ -2505,8 +2542,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
> static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
> struct page *page, int off, int size)
> {
> - __skb_fill_page_desc(skb, i, page, off, size);
> - skb_shinfo(skb)->nr_frags = i + 1;
> + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
> }
>
> /**
> @@ -2532,6 +2568,8 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
>
> void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
> int size, unsigned int truesize);
> +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
> + int off, int size, unsigned int truesize);
>
> void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
> unsigned int truesize);
> @@ -3380,7 +3418,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page,
> */
> static inline unsigned int skb_frag_off(const skb_frag_t *frag)
> {
> - return frag->bv_offset;
> + return frag->offset;
> }
>
> /**
> @@ -3390,7 +3428,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
> */
> static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
> {
> - frag->bv_offset += delta;
> + frag->offset += delta;
> }
>
> /**
> @@ -3400,7 +3438,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
> */
> static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
> {
> - frag->bv_offset = offset;
> + frag->offset = offset;
> }
>
> /**
> @@ -3411,7 +3449,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
> static inline void skb_frag_off_copy(skb_frag_t *fragto,
> const skb_frag_t *fragfrom)
> {
> - fragto->bv_offset = fragfrom->bv_offset;
> + fragto->offset = fragfrom->offset;
> }
>
> /**
> @@ -3422,7 +3460,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
> */
> static inline struct page *skb_frag_page(const skb_frag_t *frag)
> {
> - return frag->bv_page;
> + return netmem_to_page(frag->netmem);
> }
>
> /**
> @@ -3526,7 +3564,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
> static inline void skb_frag_page_copy(skb_frag_t *fragto,
> const skb_frag_t *fragfrom)
> {
> - fragto->bv_page = fragfrom->bv_page;
> + fragto->netmem = fragfrom->netmem;
> }
>
> bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 4d4b11b0a83d..8b55e927bbe9 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -845,16 +845,24 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
> }
> EXPORT_SYMBOL(__napi_alloc_skb);
>
> -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
> - int size, unsigned int truesize)
> +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
> + int off, int size, unsigned int truesize)
> {
> DEBUG_NET_WARN_ON_ONCE(size > truesize);
>
> - skb_fill_page_desc(skb, i, page, off, size);
> + skb_fill_netmem_desc(skb, i, netmem, off, size);
> skb->len += size;
> skb->data_len += size;
> skb->truesize += truesize;
> }
> +EXPORT_SYMBOL(skb_add_rx_frag_netmem);
> +
> +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
> + int size, unsigned int truesize)
> +{
> + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
> + truesize);
> +}
> EXPORT_SYMBOL(skb_add_rx_frag);
>
> void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
> @@ -1904,10 +1912,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
>
> /* skb frags point to kernel buffers */
> for (i = 0; i < new_frags - 1; i++) {
> - __skb_fill_page_desc(skb, i, head, 0, psize);
> + __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
> head = (struct page *)page_private(head);
> }
> - __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
> + __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
> + d_off);
> skb_shinfo(skb)->nr_frags = new_frags;
>
> release:
> @@ -3645,7 +3654,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
> if (plen) {
> page = virt_to_head_page(from->head);
> offset = from->data - (unsigned char *)page_address(page);
> - __skb_fill_page_desc(to, 0, page, offset, plen);
> + __skb_fill_netmem_desc(to, 0, page_to_netmem(page),
> + offset, plen);
> get_page(page);
> j = 1;
> len -= plen;
> diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
> index 65d1f6755f98..3180a54b2c68 100644
> --- a/net/kcm/kcmsock.c
> +++ b/net/kcm/kcmsock.c
> @@ -636,9 +636,15 @@ static int kcm_write_msgs(struct kcm_sock *kcm)
> for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> msize += skb_shinfo(skb)->frags[i].bv_len;
>
> + /* The cast to struct bio_vec* here assumes the frags are
> + * struct page based. WARN if there is no page in this skb.
> + */
> + DEBUG_NET_WARN_ON_ONCE(
> + !skb_frag_page(&skb_shinfo(skb)->frags[0]));
> +

It would be unsafe to continue the operation in this case. Even though
we should never get here, test and exit in all codepaths, similar to
other test above?

if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
ret = -EINVAL;
goto out;
}

> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
> - skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
> - msize);
> + (const struct bio_vec *)skb_shinfo(skb)->frags,
> + skb_shinfo(skb)->nr_frags, msize);
> iov_iter_advance(&msg.msg_iter, txm->frag_offset);
>
> do {
> --
> 2.43.0.472.g3155946c3a-goog
>