[PATCH net-next v6 06/18] net: Add a function to splice pages into an skbuff for MSG_SPLICE_PAGES

From: David Howells
Date: Tue Apr 11 2023 - 12:11:26 EST


Add a function to handle MSG_SPLICE_PAGES being passed internally to
sendmsg(). Pages are spliced into the given socket buffer if possible and
copied in if not (ie. they're slab pages or have a zero refcount).

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: David Ahern <dsahern@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: Jens Axboe <axboe@xxxxxxxxx>
cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
cc: netdev@xxxxxxxxxxxxxxx
---
include/linux/skbuff.h | 3 ++
net/core/skbuff.c | 110 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 113 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6e508274d2a5..add43417b798 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5070,5 +5070,8 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
}
#endif

+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+ ssize_t maxsize, gfp_t gfp);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d96175f58ca4..c90fc48a63a5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6838,3 +6838,113 @@ nodefer: __kfree_skb(skb);
if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
smp_call_function_single_async(cpu, &sd->defer_csd);
}
+
+static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
+ size_t offset, size_t len)
+{
+ const char *kaddr;
+ __wsum csum;
+
+ kaddr = kmap_local_page(page);
+ csum = csum_partial(kaddr + offset, len, 0);
+ kunmap_local(kaddr);
+ skb->csum = csum_block_add(skb->csum, csum, skb->len);
+}
+
+/**
+ * skb_splice_from_iter - Splice (or copy) pages to skbuff
+ * @skb: The buffer to add pages to
+ * @iter: Iterator representing the pages to be added
+ * @maxsize: Maximum amount of pages to be added
+ * @gfp: Allocation flags
+ *
+ * This is a common helper function for supporting MSG_SPLICE_PAGES. It
+ * extracts pages from an iterator and adds them to the socket buffer if
+ * possible, copying them to fragments if not possible (such as if they're slab
+ * pages).
+ *
+ * Returns the amount of data spliced/copied or -EMSGSIZE if there's
+ * insufficient space in the buffer to transfer anything.
+ */
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+ ssize_t maxsize, gfp_t gfp)
+{
+ struct page *pages[8], **ppages = pages;
+ unsigned int i;
+ ssize_t spliced = 0, ret = 0;
+ size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+
+ while (iter->count > 0) {
+ ssize_t space, nr;
+ size_t off, len;
+
+ ret = -EMSGSIZE;
+ space = frag_limit - skb_shinfo(skb)->nr_frags;
+ if (space < 0)
+ break;
+
+ /* We might be able to coalesce without increasing nr_frags */
+ nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages));
+
+ len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
+ if (len <= 0) {
+ ret = len ?: -EIO;
+ break;
+ }
+
+ if (space == 0 &&
+ !skb_can_coalesce(skb, skb_shinfo(skb)->nr_frags,
+ pages[0], off)) {
+ iov_iter_revert(iter, len);
+ break;
+ }
+
+ i = 0;
+ do {
+ struct page *page = pages[i++];
+ size_t part = min_t(size_t, PAGE_SIZE - off, len);
+ bool put = false;
+
+ if (!sendpage_ok(page)) {
+ const void *p = kmap_local_page(page);
+ void *q;
+
+ q = page_frag_memdup(NULL, p + off, part, gfp,
+ ULONG_MAX);
+ kunmap_local(p);
+ if (!q) {
+ iov_iter_revert(iter, len);
+ ret = -ENOMEM;
+ goto out;
+ }
+ page = virt_to_page(q);
+ off = offset_in_page(q);
+ put = true;
+ }
+
+ ret = skb_append_pagefrags(skb, page, off, part,
+ frag_limit);
+ if (put)
+ put_page(page);
+ if (ret < 0) {
+ iov_iter_revert(iter, len);
+ goto out;
+ }
+
+ if (skb->ip_summed == CHECKSUM_NONE)
+ skb_splice_csum_page(skb, page, off, part);
+
+ off = 0;
+ spliced += part;
+ maxsize -= part;
+ len -= part;
+ } while (len > 0);
+
+ if (maxsize <= 0)
+ break;
+ }
+
+out:
+ skb_len_add(skb, spliced);
+ return spliced ?: ret;
+}