[PATCH v3 33/55] kcm: Support MSG_SPLICE_PAGES

From: David Howells
Date: Fri Mar 31 2023 - 12:16:50 EST


Make AF_KCM sendmsg() support MSG_SPLICE_PAGES. This causes pages to be
spliced from the source iterator if possible and copied otherwise.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Tom Herbert <tom@xxxxxxxxxxxxxxx>
cc: Tom Herbert <tom@xxxxxxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: Jens Axboe <axboe@xxxxxxxxx>
cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
cc: netdev@xxxxxxxxxxxxxxx
---
net/kcm/kcmsock.c | 89 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 72 insertions(+), 17 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cfe828bd7fc6..0a3f79d81595 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -989,29 +989,84 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
merge = false;
}

- copy = min_t(int, msg_data_left(msg),
- pfrag->size - pfrag->offset);
+ if (msg->msg_flags & MSG_SPLICE_PAGES) {
+ struct page *page = NULL, **pages = &page;
+ size_t off;
+ bool put = false;
+
+ err = iov_iter_extract_pages(&msg->msg_iter, &pages,
+ INT_MAX, 1, 0, &off);
+ if (err <= 0) {
+ err = err ?: -EIO;
+ goto out_error;
+ }
+ copy = err;

- if (!sk_wmem_schedule(sk, copy))
- goto wait_for_memory;
+ if (skb_can_coalesce(skb, i, page, off)) {
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ goto coalesced;
+ }

- err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
- pfrag->page,
- pfrag->offset,
- copy);
- if (err)
- goto out_error;
+ if (!sk_wmem_schedule(sk, copy)) {
+ iov_iter_revert(&msg->msg_iter, copy);
+ goto wait_for_memory;
+ }
+
+ if (!sendpage_ok(page)) {
+ const void *p = kmap_local_page(page);
+ void *q;

- /* Update the skb. */
- if (merge) {
- skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ q = page_frag_memdup(NULL, p + off, copy,
+ sk->sk_allocation, ULONG_MAX);
+ kunmap_local(p);
+ if (!q) {
+ iov_iter_revert(&msg->msg_iter, copy);
+ err = -ENOMEM;
+ goto out_error;
+ }
+ page = virt_to_page(q);
+ off = offset_in_page(q);
+ put = true;
+ }
+
+ skb_fill_page_desc_noacc(skb, i, page, off, copy);
+ if (put)
+ put_page(page);
+coalesced:
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
+ skb->len += copy;
+ skb->data_len += copy;
+ skb->truesize += copy;
+ sk->sk_wmem_queued += copy;
+ sk_mem_charge(sk, copy);
+
+ if (head != skb)
+ head->truesize += copy;
} else {
- skb_fill_page_desc(skb, i, pfrag->page,
- pfrag->offset, copy);
- get_page(pfrag->page);
+ copy = min_t(int, msg_data_left(msg),
+ pfrag->size - pfrag->offset);
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_memory;
+
+ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+ pfrag->page,
+ pfrag->offset,
+ copy);
+ if (err)
+ goto out_error;
+
+ /* Update the skb. */
+ if (merge) {
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ } else {
+ skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, copy);
+ get_page(pfrag->page);
+ }
+
+ pfrag->offset += copy;
}

- pfrag->offset += copy;
copied += copy;
if (head != skb) {
head->len += copy;