Re: [PATCH v4 net 3/5] tcp/udp: Call inet6_destroy_sock() in IPv6 sk->sk_destruct().
From: Paolo Abeni
Date: Thu Oct 06 2022 - 05:20:11 EST
On Tue, 2022-10-04 at 10:18 -0700, Kuniyuki Iwashima wrote:
> Originally, inet6_sk(sk)->XXX were changed under lock_sock(), so we were
> able to clean them up by calling inet6_destroy_sock() during the IPv6 ->
> IPv4 conversion by IPV6_ADDRFORM. However, commit 03485f2adcde ("udpv6:
> Add lockless sendmsg() support") added a lockless memory allocation path,
> which could cause a memory leak:
>
> setsockopt(IPV6_ADDRFORM) sendmsg()
> +-----------------------+ +-------+
> - do_ipv6_setsockopt(sk, ...) - udpv6_sendmsg(sk, ...)
> - lock_sock(sk) ^._ called via udpv6_prot
> - WRITE_ONCE(sk->sk_prot, &tcp_prot) before WRITE_ONCE()
> - inet6_destroy_sock()
> - release_sock(sk) - ip6_make_skb(sk, ...)
> ^._ lockless fast path for
> the non-corking case
>
> - __ip6_append_data(sk, ...)
> - ipv6_local_rxpmtu(sk, ...)
> - xchg(&np->rxpmtu, skb)
> ^._ rxpmtu is never freed.
>
> - lock_sock(sk)
>
> For now, rxpmtu is only the case, but not to miss the future change
> and a similar bug fixed in commit e27326009a3d ("net: ping6: Fix
> memleak in ipv6_renew_options()."), let's set a new function to IPv6
> sk->sk_destruct() and call inet6_cleanup_sock() there. Since the
> conversion does not change sk->sk_destruct(), we can guarantee that
> we can clean up IPv6 resources finally.
>
> We can now remove all inet6_destroy_sock() calls from IPv6 protocol
> specific ->destroy() functions, but such changes are invasive to
> backport. So they can be posted as a follow-up later for net-next.
>
> Fixes: 03485f2adcde ("udpv6: Add lockless sendmsg() support")
> Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
> ---
> Cc: Vladislav Yasevich <vyasevic@xxxxxxxxxx>
> ---
> include/net/ipv6.h | 1 +
> include/net/udp.h | 2 +-
> include/net/udplite.h | 8 --------
> net/ipv4/udp.c | 9 ++++++---
> net/ipv4/udplite.c | 8 ++++++++
> net/ipv6/af_inet6.c | 9 ++++++++-
> net/ipv6/udp.c | 15 ++++++++++++++-
> net/ipv6/udp_impl.h | 1 +
> net/ipv6/udplite.c | 9 ++++++++-
> 9 files changed, 47 insertions(+), 15 deletions(-)
>
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index dfa70789b771..e7ec3e8cd52e 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -1179,6 +1179,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
> void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
>
> void inet6_cleanup_sock(struct sock *sk);
> +void inet6_sock_destruct(struct sock *sk);
> int inet6_release(struct socket *sock);
> int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
> int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
> diff --git a/include/net/udp.h b/include/net/udp.h
> index 5ee88ddf79c3..fee053bcd17c 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -247,7 +247,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
> }
>
> /* net/ipv4/udp.c */
> -void udp_destruct_sock(struct sock *sk);
> +void udp_destruct_common(struct sock *sk);
> void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
> int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
> void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
> diff --git a/include/net/udplite.h b/include/net/udplite.h
> index 0143b373602e..299c14ce2bb9 100644
> --- a/include/net/udplite.h
> +++ b/include/net/udplite.h
> @@ -25,14 +25,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset,
> return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT;
> }
>
> -/* Designate sk as UDP-Lite socket */
> -static inline int udplite_sk_init(struct sock *sk)
> -{
> - udp_init_sock(sk);
> - udp_sk(sk)->pcflag = UDPLITE_BIT;
> - return 0;
> -}
> -
> /*
> * Checksumming routines
> */
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 560d9eadeaa5..48adb418e404 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -1598,7 +1598,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
> }
> EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
>
> -void udp_destruct_sock(struct sock *sk)
> +void udp_destruct_common(struct sock *sk)
> {
> /* reclaim completely the forward allocated memory */
> struct udp_sock *up = udp_sk(sk);
> @@ -1611,10 +1611,14 @@ void udp_destruct_sock(struct sock *sk)
> kfree_skb(skb);
> }
> udp_rmem_release(sk, total, 0, true);
> +}
> +EXPORT_SYMBOL_GPL(udp_destruct_common);
>
> +static void udp_destruct_sock(struct sock *sk)
> +{
> + udp_destruct_common(sk);
> inet_sock_destruct(sk);
> }
> -EXPORT_SYMBOL_GPL(udp_destruct_sock);
>
> int udp_init_sock(struct sock *sk)
> {
> @@ -1622,7 +1626,6 @@ int udp_init_sock(struct sock *sk)
> sk->sk_destruct = udp_destruct_sock;
> return 0;
> }
> -EXPORT_SYMBOL_GPL(udp_init_sock);
>
> void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
> {
> diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
> index 6e08a76ae1e7..4785ac4a8719 100644
> --- a/net/ipv4/udplite.c
> +++ b/net/ipv4/udplite.c
> @@ -17,6 +17,14 @@
> struct udp_table udplite_table __read_mostly;
> EXPORT_SYMBOL(udplite_table);
>
> +/* Designate sk as UDP-Lite socket */
> +static inline int udplite_sk_init(struct sock *sk)
You should avoid the 'inline' specifier in c files.
> +{
> + udp_init_sock(sk);
> + udp_sk(sk)->pcflag = UDPLITE_BIT;
> + return 0;
> +}
> +
> static int udplite_rcv(struct sk_buff *skb)
> {
> return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index 83b9e432f3df..ce5378b78ec9 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -109,6 +109,13 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
> return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
> }
>
> +void inet6_sock_destruct(struct sock *sk)
> +{
> + inet6_cleanup_sock(sk);
> + inet_sock_destruct(sk);
> +}
> +EXPORT_SYMBOL_GPL(inet6_sock_destruct);
I'm sorry for not noticing this before, but it looks like the above
export is not needed? only used by udp, which is in the same binary
(either kernel of ipv6 module) as af_inet6
Cheers,
Paolo