[PATCH net-next v10 16/16] unix: Convert unix_stream_sendpage() to use MSG_SPLICE_PAGES

From: David Howells
Date: Mon May 22 2023 - 08:14:55 EST


Convert unix_stream_sendpage() to use sendmsg() with MSG_SPLICE_PAGES
rather than directly splicing in the pages itself.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
cc: Jens Axboe <axboe@xxxxxxxxx>
cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
cc: netdev@xxxxxxxxxxxxxxx
---

Notes:
ver #10)
- Fix subject to refer to unix_stream_sendpage() not udp_sendpage().

net/unix/af_unix.c | 134 +++------------------------------------------
1 file changed, 7 insertions(+), 127 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 976bc1c5e11b..115436ce1f8a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1839,24 +1839,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
}
}

-static int maybe_init_creds(struct scm_cookie *scm,
- struct socket *socket,
- const struct sock *other)
-{
- int err;
- struct msghdr msg = { .msg_controllen = 0 };
-
- err = scm_send(socket, &msg, scm, false);
- if (err)
- return err;
-
- if (unix_passcred_enabled(socket, other)) {
- scm->pid = get_pid(task_tgid(current));
- current_uid_gid(&scm->creds.uid, &scm->creds.gid);
- }
- return err;
-}
-
static bool unix_skb_scm_eq(struct sk_buff *skb,
struct scm_cookie *scm)
{
@@ -2292,117 +2274,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
int offset, size_t size, int flags)
{
- int err;
- bool send_sigpipe = false;
- bool init_scm = true;
- struct scm_cookie scm;
- struct sock *other, *sk = socket->sk;
- struct sk_buff *skb, *newskb = NULL, *tail = NULL;
-
- if (flags & MSG_OOB)
- return -EOPNOTSUPP;
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };

- other = unix_peer(sk);
- if (!other || sk->sk_state != TCP_ESTABLISHED)
- return -ENOTCONN;
-
- if (false) {
-alloc_skb:
- unix_state_unlock(other);
- mutex_unlock(&unix_sk(other)->iolock);
- newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
- &err, 0);
- if (!newskb)
- goto err;
- }
-
- /* we must acquire iolock as we modify already present
- * skbs in the sk_receive_queue and mess with skb->len
- */
- err = mutex_lock_interruptible(&unix_sk(other)->iolock);
- if (err) {
- err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
- goto err;
- }
-
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- err = -EPIPE;
- send_sigpipe = true;
- goto err_unlock;
- }
-
- unix_state_lock(other);
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;

- if (sock_flag(other, SOCK_DEAD) ||
- other->sk_shutdown & RCV_SHUTDOWN) {
- err = -EPIPE;
- send_sigpipe = true;
- goto err_state_unlock;
- }
-
- if (init_scm) {
- err = maybe_init_creds(&scm, socket, other);
- if (err)
- goto err_state_unlock;
- init_scm = false;
- }
-
- skb = skb_peek_tail(&other->sk_receive_queue);
- if (tail && tail == skb) {
- skb = newskb;
- } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
- if (newskb) {
- skb = newskb;
- } else {
- tail = skb;
- goto alloc_skb;
- }
- } else if (newskb) {
- /* this is fast path, we don't necessarily need to
- * call to kfree_skb even though with newskb == NULL
- * this - does no harm
- */
- consume_skb(newskb);
- newskb = NULL;
- }
-
- if (skb_append_pagefrags(skb, page, offset, size, MAX_SKB_FRAGS)) {
- tail = skb;
- goto alloc_skb;
- }
-
- skb->len += size;
- skb->data_len += size;
- skb->truesize += size;
- refcount_add(size, &sk->sk_wmem_alloc);
-
- if (newskb) {
- err = unix_scm_to_skb(&scm, skb, false);
- if (err)
- goto err_state_unlock;
- spin_lock(&other->sk_receive_queue.lock);
- __skb_queue_tail(&other->sk_receive_queue, newskb);
- spin_unlock(&other->sk_receive_queue.lock);
- }
-
- unix_state_unlock(other);
- mutex_unlock(&unix_sk(other)->iolock);
-
- other->sk_data_ready(other);
- scm_destroy(&scm);
- return size;
-
-err_state_unlock:
- unix_state_unlock(other);
-err_unlock:
- mutex_unlock(&unix_sk(other)->iolock);
-err:
- kfree_skb(newskb);
- if (send_sigpipe && !(flags & MSG_NOSIGNAL))
- send_sig(SIGPIPE, current, 0);
- if (!init_scm)
- scm_destroy(&scm);
- return err;
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return unix_stream_sendmsg(socket, &msg, size);
}

static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,