[PATCH] net: Allow custom iter handler in uarg
From: David Ahern
Date: Tue Apr 19 2022 - 12:39:59 EST
Add support for custom iov_iter handling to ubuf. The idea is that
in-kernel subsystems want control over how an SG is split.
The custom iterator is a union with mmpin to keep the size of
ubuf_info <= sizeof(skb->cb) which is 48B.
Signed-off-by: David Ahern <dsahern@xxxxxxxxxx>
---
include/linux/skbuff.h | 21 ++++++++++++++++-----
net/core/datagram.c | 11 ++++++++---
net/core/datagram.h | 3 ++-
net/core/skbuff.c | 19 +++++++++++++++----
net/ipv4/ip_output.c | 2 +-
net/ipv6/ip6_output.c | 2 +-
6 files changed, 43 insertions(+), 15 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dbf820a50a39..71161f65dedd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -482,11 +482,21 @@ struct ubuf_info {
};
refcount_t refcnt;
u8 flags;
+ u8 has_sg_from_iter;
- struct mmpin {
- struct user_struct *user;
- unsigned int num_pg;
- } mmp;
+ /* sg_from_iter is expected to be used with ubuf in
+ * msghdr and is only referenced at the transport
+ * layer segmenting an iov into packets. mmpin is used
+ * by in-tree ubuf_info {re,}alloc at L3 layer.
+ */
+ union {
+ int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length);
+ struct mmpin {
+ struct user_struct *user;
+ unsigned int num_pg;
+ } mmp;
+ };
};
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
@@ -503,7 +513,8 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success);
-int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len);
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len,
+ struct ubuf_info *uarg);
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 15ab9ffb27fe..9ca61a0a400d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -617,10 +617,15 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length)
+ struct iov_iter *from, size_t length,
+ struct ubuf_info *uarg)
{
- int frag = skb_shinfo(skb)->nr_frags;
+ int frag;
+ if (unlikely(uarg && uarg->has_sg_from_iter))
+ return uarg->sg_from_iter(sk, skb, from, length);
+
+ frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
struct page *last_head = NULL;
@@ -704,7 +709,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
if (skb_copy_datagram_from_iter(skb, 0, from, copy))
return -EFAULT;
- return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+ return __zerocopy_sg_from_iter(NULL, skb, from, ~0U, NULL);
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
diff --git a/net/core/datagram.h b/net/core/datagram.h
index bcfb75bfa3b2..65027fcf3322 100644
--- a/net/core/datagram.h
+++ b/net/core/datagram.h
@@ -10,6 +10,7 @@ struct sk_buff;
struct iov_iter;
int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
+ struct iov_iter *from, size_t length,
+ struct ubuf_info *uarg);
#endif /* _NET_CORE_DATAGRAM_H_ */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 17b93177a68f..9acb43e5a779 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1158,6 +1158,7 @@ struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
uarg = (void *)skb->cb;
+ uarg->has_sg_from_iter = 0;
uarg->mmp.user = NULL;
if (mm_account_pinned_pages(&uarg->mmp, size)) {
@@ -1206,6 +1207,12 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
return NULL;
}
+ if (WARN_ON(uarg->has_sg_from_iter)) {
+ uarg->has_sg_from_iter = 0;
+ uarg->mmp.user = NULL;
+ uarg->mmp.num_pg = 0;
+ }
+
next = (u32)atomic_read(&sk->sk_zckey);
if ((u32)(uarg->id + uarg->len) == next) {
if (mm_account_pinned_pages(&uarg->mmp, size))
@@ -1258,7 +1265,10 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
u32 lo, hi;
u16 len;
- mm_unaccount_pinned_pages(&uarg->mmp);
+
+ WARN_ON(uarg->has_sg_from_iter);
+ if (!uarg->has_sg_from_iter)
+ mm_unaccount_pinned_pages(&uarg->mmp);
/* if !len, there was only 1 call, and it was aborted
* so do not queue a completion notification
@@ -1319,9 +1329,10 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
-int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len,
+ struct ubuf_info *uarg)
{
- return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
+ return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len, uarg);
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
@@ -1339,7 +1350,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
if (orig_uarg && uarg != orig_uarg)
return -EEXIST;
- err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+ err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len, uarg);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
struct sock *save_sk = skb->sk;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1b6a64b19c76..1ff403c2dcb0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1238,7 +1238,7 @@ static int __ip_append_data(struct sock *sk,
skb->truesize += copy;
wmem_alloc_delta += copy;
} else {
- err = skb_zerocopy_iter_dgram(skb, from, copy);
+ err = skb_zerocopy_iter_dgram(skb, from, copy, uarg);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 63a217128f8b..6795144653ac 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1791,7 +1791,7 @@ static int __ip6_append_data(struct sock *sk,
skb->truesize += copy;
wmem_alloc_delta += copy;
} else {
- err = skb_zerocopy_iter_dgram(skb, from, copy);
+ err = skb_zerocopy_iter_dgram(skb, from, copy, uarg);
if (err < 0)
goto error;
}
--
2.25.1
--1yeeQ81UyVL57Vl7--